@proceedings {Distefano2010265, title = {Availability assessment of HA standby redundant clusters}, journal = {Proceedings of the 29th IEEE Symposium on Reliable Distributed Systems (SRDS)}, year = {2010}, note = {cited By 14; Conference of 29th IEEE Symposium on Reliable Distributed Systems, SRDS 2010 ; Conference Date: 31 October 2010 Through 3 November 2010; Conference Code:82914}, pages = {265-274}, publisher = {IEEE Computer Society}, address = {New Delhi, India, 31 October - 3 November 2010}, abstract = {

Computing systems are becoming the heart of modern technology, implementing critical tasks usually demanded to and implying human interactions. This highlights the problem of dependability in computer science contexts. High availability computing/clusters is a possible solution in such cases, implementing standby redundancy as a tradeoff between dependability and costs. From the engineering perspective, this implies the use of specific techniques and tools for adequately evaluating the reliability/availability of high availability clusters, also taking into account dependencies among nodes (standby, repair, etc.) and the effect of wear and tear into such nodes, especially when failure and repair times are not exponentially distributed. The solution proposed in this paper is based on the use of phase type distributions and Kronecker algebra. In fact, we represent the reliability and maintainability of each component by specific phase type distributions, whose interactions describe the system availability. This latter is thus modeled by an expanded Markov chain expressed in terms of Kronecker algebra in order to face the state space explosion problem of expansion techniques and to represent the memory policies related to the aging process. More specifically, the paper firstly details the technique and then applies it to the evaluation of a standby redundant system representing a high availability cluster taken as example with the aim of demonstrating its effectiveness. Moreover, in order to show the potentiality of the technique, different maintenance strategies are evaluated and therefore compared. {\textcopyright} 2010 IEEE.

}, keywords = {Aging process, Algebra, Availability assessment, Computing system, Critical tasks, Dynamic reliability, Engineering perspective, High availability, High-availability clusters, Human interactions, Kronecker algebra, Maintainability, Maintenance strategies, Markov Chain, Markov processes, Modern technologies, phase type distributions, Possible solutions, Quality assurance, Redundancy, Redundant system, Standby redundancy, State-space explosion, System availability, Wear and tear}, isbn = {9780769542508}, issn = {10609857}, doi = {10.1109/SRDS.2010.37}, url = {http://www.scopus.com/inward/record.url?eid=2-s2.0-78650560407\&partnerID=40\&md5=9666fbfeaaae60d0a7d7ca8f388600e7}, author = {Salvatore Distefano and Francesco Longo and Marco Scarpa} }