% ======================== 2016 =========================================== @inproceedings{ key = {101}, author = {Ivan Candela and Gabriele Bavota and Barbara Russo and Rocco Oliveto}, title = {Using cohesion and coupling for software remodularization: is it enough?}, booktitle = {Transactions on Software Engineering and Mehodology.}, publisher = {ACM Press.}, year = {2016}, note = {[To appear]}, abstract = {Refactoring, and in particular, remodularization operations can be performed to repair the design of a software system and remove the erosion caused by software evolution. Various approaches have been proposed to support developers during the remodularization of a software system. Most of these approaches are based on the underlying assumption that developers pursue an optimal balance between quality metrics—such as cohesion and coupling—when modularizing the classes of their systems. Thus, a remodularization recommender proposes a solution that implicitly provides a (near) optimal balance between such quality metrics. However, there is still a lack of empirical evidence that such a balance is the desideratum by developers. This paper aims at bridging this gap by analyzing both objectively and subjectively the aforementioned phenomenon. Specifically, we present the results of (i) a large study analyzing the modularization quality, in terms of package cohesion and coupling, of 100 open source systems, and (ii) a survey conducted with 34 developers aimed at understanding the driving factors they consider when performing modularization tasks. The results achieved have been used to distill a set of lessons learned that might be considered to design more effective remodularization recommenders. } } @inproceedings{ key = {100}, author = {Luca Ponzanelli and Gabriele Bavota and Massimiliano Di Penta and Rocco Oliveto and Michele Lanza}, title = {Turning the IDE into a self-confident programming assistant}, booktitle = {Empirical Software Engineering Journal.}, publisher = {Springer Press.}, year = {2016}, note = {[To appear]}, abstract = {Developers often require knowledge beyond the one they possess, which boils down to asking co-workers for help or consulting additional sources of information, such as Application Programming Interfaces (API) documentation, forums, and Q&A websites. However, it requires time and energy to formulate one's problem, peruse and process the results. We propose a novel approach that, given a context in the Integrated Development Environment (IDE), automatically retrieves pertinent discussions from StackOverflow, evaluates their relevance using a multi-faceted ranking model, and, if a given confidence threshold is surpassed, notifies the developer. We have implemented our approach in Prompter, an Eclipse plug-in. Prompter was evaluated in two empirical studies. The first study was aimed at evaluating Prompter's ranking model and involved 33 participants. The second study was conducted with 12 participants and aimed at evaluating Prompter's usefulness when supporting developers during development and maintenance tasks. Since Prompter uses "volatile information" crawled from the web, we also replicated Study I after one year to assess the impact of such a "volatility" on recommenders like Prompter. Our results indicate that (i) Prompter recommendations were positively evaluated in 74% of the cases on average, (ii) Prompter significantly helps developers to improve the correctness of their tasks by 24% on average, but also (iii) 78% of the provided recommendations are ``volatile" and can change at one year of distance. While Prompter revealed to be effective, our studies also point out issues when building recommenders based on information available on online forums. } } @inproceedings{DBLP:conf/issta/PalombaPZOL16, key = {99}, author = {Fabio Palomba and Annibale Panichella and Andy Zaidman and Rocco Oliveto and Andrea De Lucia}, title = {Automatic test case generation: what if test code quality matters?}, booktitle = {Proceedings of the 25th International Symposium on Software Testing and Analysis, {ISSTA} 2016, Saarbr{\"{u}}cken, Germany, July 18-20, 2016}, pages = {130--141}, year = {2016}, crossref = {DBLP:conf/issta/2016}, url = {http://doi.acm.org/10.1145/2931037.2931057}, doi = {10.1145/2931037.2931057}, timestamp = {Thu, 14 Jul 2016 15:39:00 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/issta/PalombaPZOL16}, bibsource = {dblp computer science bibliography, http://dblp.org}, abstract = {Test case generation tools that optimize code coverage have been extensively investigated. Recently, researchers have suggested to add other non-coverage criteria, such as memory consumption or readability, to increase the practical usefulness of generated tests. In this paper, we observe that test code quality metrics, and test cohesion and coupling in particular, are valuable candidates as additional criteria. Indeed, tests with low cohesion and/or high coupling have been shown to have a negative impact on future maintenance activities. In an exploratory investigation we show that most generated tests are indeed affected by poor test code quality. For this reason, we incorporate cohesion and coupling metrics into the main loop of search-based algorithm for test case generation. Through an empirical study we show that our approach is not only able to generate tests that are more cohesive and less coupled, but can (i) increase branch coverage up to 10% when enough time is given to the search and (ii) result in statistically shorter tests.} } @inproceedings{DBLP:conf/iwpc/ScalabrinoVPO16, key = {98}, author = {Simone Scalabrino and Mario Linares V{\'{a}}squez and Denys Poshyvanyk and Rocco Oliveto}, title = {Improving code readability models with textual features}, booktitle = {24th {IEEE} International Conference on Program Comprehension, {ICPC} 2016, Austin, TX, USA, May 16-17, 2016}, pages = {1--10}, year = {2016}, crossref = {DBLP:conf/iwpc/2016}, url = {http://dx.doi.org/10.1109/ICPC.2016.7503707}, doi = {10.1109/ICPC.2016.7503707}, timestamp = {Thu, 21 Jul 2016 14:57:20 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/iwpc/ScalabrinoVPO16}, bibsource = {dblp computer science bibliography, http://dblp.org}, abstract = {Code reading is one of the most frequent activities in software maintenance; before implementing changes, it is necessary to fully understand source code often written by other developers. Thus, readability is a crucial aspect of source code that might significantly influence program comprehension effort. In general, models used to estimate software readability take into account only structural aspects of source code, e.g., line length and a number of comments. However, code is a particular form of text; therefore, a code readability model should not ignore the textual aspects of source code encapsulated in identifiers and comments. In this paper, we propose a set of textual features that could be used to measure code readability. We evaluated the proposed textual features on 600 code snippets manually evaluated (in terms of readability) by 5K+ people. The results show that the proposed features complement classic structural features when predicting readability judgments. Consequently, a code readability model based on a richer set of features, including the ones proposed in this paper, achieves a significantly better accuracy as compared to all the state-of-the-art readability models.} } @inproceedings{DBLP:conf/iwpc/PalombaPLOZ16, key = {97}, author = {Fabio Palomba and Annibale Panichella and Andrea De Lucia and Rocco Oliveto and Andy Zaidman}, title = {A textual-based technique for Smell Detection}, booktitle = {24th {IEEE} International Conference on Program Comprehension, {ICPC} 2016, Austin, TX, USA, May 16-17, 2016}, pages = {1--10}, year = {2016}, crossref = {DBLP:conf/iwpc/2016}, url = {http://dx.doi.org/10.1109/ICPC.2016.7503704}, doi = {10.1109/ICPC.2016.7503704}, timestamp = {Thu, 21 Jul 2016 14:57:20 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/iwpc/PalombaPLOZ16}, bibsource = {dblp computer science bibliography, http://dblp.org}, abstract = {In this paper, we present TACO (Textual Analysis for Code Smell Detection), a technique that exploits textual analysis to detect a family of smells of different nature and different levels of granularity. We run TACO on 10 open source projects, comparing its performance with existing smell detectors purely based on structural information extracted from code components. The analysis of the results indicates that TACO’s precision ranges between 67% and 77%, while its recall ranges between 72% and 84%. Also, TACO often outperforms alternative structural approaches confirming, once again, the usefulness of information that can be derived from the textual part of code components.} } @inproceedings{DBLP:conf/icse/PalombaNPOL16, key = {96}, author = {Fabio Palomba and Dario Di Nucci and Annibale Panichella and Rocco Oliveto and Andrea De Lucia}, title = {On the diffusion of test smells in automatically generated test code: an empirical study}, booktitle = {Proceedings of the 9th International Workshop on Search-Based Software Testing, SBST@ICSE 2016, Austin, Texas, USA, May 14-22, 2016}, pages = {5--14}, year = {2016}, crossref = {DBLP:conf/icse/2016sbst}, url = {http://doi.acm.org/10.1145/2897010.2897016}, doi = {10.1145/2897010.2897016}, timestamp = {Wed, 20 Jul 2016 09:47:06 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/PalombaNPOL16}, bibsource = {dblp computer science bibliography, http://dblp.org}, abstract = {The role of software testing in the software development process is widely recognized as a key activity for successful projects. This is the reason why in the last decade several automatic unit test generation tools have been proposed, focusing particularly on high code coverage. Despite the effort spent by the research community, there is still a lack of empirical investigation aimed at analyzing the characteristics of the produced test code. Indeed, while some studies inspected the effectiveness and the usability of these tools in practice, it is still unknown whether test code is maintainable. In this paper, we conducted a large scale empirical study in order to analyze the diffusion of bad design solutions, namely test smells, in automatically generated unit test classes. Results of the study show the high diffusion of test smells as well as the frequent co-occurrence of different types of design problems. Finally we found that all test smells have strong positive correlation with structural characteristics of the systems such as size or number of classes.} } @inproceedings{DBLP:conf/icse/PonzanelliBMPOH16, key = {95}, author = {Luca Ponzanelli and Gabriele Bavota and Andrea Mocci and Massimiliano Di Penta and Rocco Oliveto and Mir Anamul Hasan and Barbara Russo and Sonia Haiduc and Michele Lanza}, title = {Too long; didn't watch!: extracting relevant fragments from software development video tutorials}, booktitle = {Proceedings of the 38th International Conference on Software Engineering, {ICSE} 2016, Austin, TX, USA, May 14-22, 2016}, pages = {261--272}, year = {2016}, crossref = {DBLP:conf/icse/2016}, url = {http://doi.acm.org/10.1145/2884781.2884824}, doi = {10.1145/2884781.2884824}, timestamp = {Mon, 16 May 2016 10:25:36 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/PonzanelliBMPOH16}, bibsource = {dblp computer science bibliography, http://dblp.org}, abstract = {When facing difficulties solving a task at hand, and knowledgeable colleagues are not available, developers resort to offline and online resources, e.g. official documentation, third-party tutorials, mailing lists, and Q&A websites. These, however, need to be found, read, and understood, which takes its toll in terms of time and mental energy. A more immediate and accessible resource are video tutorials found on the web, which in recent years have seen a steep increase in popularity. Nonetheless, videos are an intrinsically noisy data source, and finding the right piece of information might be even more cumbersome than using the previously mentioned resources. We present CodeTube, an approach which mines video tutorials found on the web, and enables developers to query their contents. The video tutorials are processed and split into coherent fragments, to return only fragments related to the query. As an added benefit, the relevant video fragments are complemented with information from additional sources, such as Stack Overflow discussions. The results of two studies to assess CodeTube indicate that video tutorials - if appropriately processed - represent a useful, yet still under-utilized source of information for software development.} } @inproceedings{DBLP:conf/icse/VillarroelBROP16, key = {94}, author = {Lorenzo Villarroel and Gabriele Bavota and Barbara Russo and Rocco Oliveto and Massimiliano Di Penta}, title = {Release planning of mobile apps based on user reviews}, booktitle = {Proceedings of the 38th International Conference on Software Engineering, {ICSE} 2016, Austin, TX, USA, May 14-22, 2016}, pages = {14--24}, year = {2016}, crossref = {DBLP:conf/icse/2016}, url = {http://doi.acm.org/10.1145/2884781.2884818}, doi = {10.1145/2884781.2884818}, timestamp = {Sun, 15 May 2016 11:55:22 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/VillarroelBROP16}, bibsource = {dblp computer science bibliography, http://dblp.org}, abstract = {Developers have to to constantly improve their apps by fixing critical bugs and implementing the most desired features in order to gain shares in the continuously increasing and competitive market of mobile apps. A precious source of information to plan such activities is represented by reviews left by users on the app store. However, in order to exploit such information developers need to manually analyze such reviews. This is something not doable if, as frequently happens, the app receives hundreds of reviews per day. In this paper we introduce CLAP (Crowd Listener for releAse Planning), a thorough solution to (i) categorize user reviews based on the information they carry out (e.g., bug reporting), (ii) cluster together related reviews (e.g., all reviews reporting the same bug), and (iii) automatically prioritize the clusters of reviews to be implemented when planning the subsequent app release. We evaluated all the steps behind CLAP, showing its high accuracy in categorizing and clustering reviews and the meaningfulness of the recommended prioritizations. Also, given the availability of CLAP as a working tool, we assessed its practical applicability in industrial environments.} } @inproceedings{DBLP:conf/icse/PonzanelliBMPOR16, key = {93}, author = {Luca Ponzanelli and Gabriele Bavota and Andrea Mocci and Massimiliano Di Penta and Rocco Oliveto and Barbara Russo and Sonia Haiduc and Michele Lanza}, title = {CodeTube: extracting relevant fragments from software development video tutorials}, booktitle = {Proceedings of the 38th International Conference on Software Engineering, {ICSE} 2016, Austin, TX, USA, May 14-22, 2016 - Companion Volume}, pages = {645--648}, year = {2016}, crossref = {DBLP:conf/icse/2016c}, url = {http://doi.acm.org/10.1145/2889160.2889172}, doi = {10.1145/2889160.2889172}, timestamp = {Sun, 15 May 2016 12:23:10 +0200}, biburl = {http://dblp.uni-trier.de/rec/bib/conf/icse/PonzanelliBMPOR16}, bibsource = {dblp computer science bibliography, http://dblp.org}, abstract = {Nowadays developers heavily rely on sources of informal documentation, including Q&A forums, slides, or video tutorials, the latter being particularly useful to provide introductory notions for a piece of technology. The current practice is that developers have to browse sources individually, which in the case of video tutorials is cumbersome, as they are lengthy and cannot be searched based on their contents. We present CodeTube, a Web-based recommender system that analyzes the contents of video tutorials and is able to provide, given a query, cohesive and self-contained video fragments, along with links to relevant Stack Overflow discussions. CodeTube relies on a combination of textual analysis and image processing applied on video tutorial frames and speech transcripts to split videos into cohesive fragments, index them and identify related Stack Overflow discussions.} } @inproceedings{ key = {89}, author = {Fiammetta Marulli and Remo Pareschi and Daniele Baldacci}, title = {The Internet of Speaking Things and Its Applications to Cultural Heritage}, booktitle = {International Conference on Internet of Things and Big Data, Rome, April 2016}, year = {2016}, crossref = {DBLP:conf/wcre/2016}, url = {http://dx.doi.org/10.5220/0005877701070117}, doi = {10.5220/0005877701070117}, abstract = {The initial driver for the development of an Internet of Things (IoT) was to provide an infrastructure capable of turning anything into a sensor that acquires and pours data into the cloud, where they can be aggregated with other data and analysed to extract decision-supportive information. The validity of this initial motivation still stands. However, going in the opposite direction is at least as useful and exciting, by exploiting Internet to make things communicate and speak, thus complementing their capability to sense and listen. In this work we present applications of IoT aimed to support the Cultural Heritage environments, but also suitable for Tourism and Smart Urban environments, that advance the available user-experience based on smart devices via the interaction with speaking things. In the first place we describe a system architecture for speaking things, comprehensive of the basic communication protocols for carrying information to the user as well as of higher-level functionalities for content generation and dialogue management. We then show how this architecture is applied to make artworks speak to people. Finally, we introduce speaking holograms as a yet more advanced and interactive application.} }