\documentclass[conference,twoside]{IEEEtran} \IEEEoverridecommandlockouts %\overrideIEEEmargins \usepackage{stfloats} \usepackage{cite} %\usepackage{epsf} %\usepackage[final]{ieeefig} \usepackage{amsmath,amsthm,amssymb,amsfonts} \interdisplaylinepenalty=2500 \usepackage{color} \usepackage{float} %\usepackage{subfigure} \usepackage{verbatim} %\usepackage{epsf} \usepackage{hyperref} \usepackage{moreverb} \usepackage{pifont} \usepackage{psfrag} \usepackage{epsfig,graphicx,subfigure,epstopdf} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \newtheorem{prop}[theorem]{Proposition} \newtheorem{claim}[theorem]{Claim} \newtheorem{cor}[theorem]{Corollary} \newtheorem{defn}{Definition}[section] \newtheorem{example}{Example}[section] \newtheorem{obs}{Observation} \newtheorem{remark}{Remark} \newcommand{\mtx}[1]{{\mathrm{#1}}} \newcommand{\e}[2]{e^{(#1)}_{#2}} \newcommand{\z}[2]{z^{(#1)}_{#2}} \newcommand{\un}[1]{\underline{#1}} \newcommand{\bb}[1]{\mathbb{#1}} \newcommand{\one}[1]{1_{\{#1\}}} \newcommand{\mysum}[2]{\sum_{#1}^{#2}} \newcommand{\myprod}[2]{\prod_{#1}^{#2}} \newcommand{\ex}[1]{\E\left[#1\right]} \newcommand{\condex}[2]{\E\left[#1|#2\right]} \newcommand{\exwrt}[2]{\E_{#1}\left[#2\right]} \newcommand{\pr}[1]{ {\sf P}\{#1\}} \newcommand{\cpr}[2]{\text{Pr}\{#1|#2\}} \newcommand{\la}{|\lambda |} \newcommand{\A}{\mathcal{A}} \newcommand{\C}{\mathcal{C}} \newcommand{\U}{\mathcal{U}} \newcommand{\V}{\mathcal{V}} \newcommand{\hx}{\hat{x}} \newcommand{\bx}{\bar{x}} \DeclareMathOperator\E{\sf E} \newcommand{\ab}{{\boldsymbol \alpha}} \newcommand{\R}{\mathbb{R}} \newcommand{\tra}{\mathsf{T}} \graphicspath{{figs/}} \allowdisplaybreaks \begin{document} \IEEEpeerreviewmaketitle \date{} \title{Anytime Capacity of Markov Channels} \author{ \IEEEauthorblockN{Paolo~Minero} \IEEEauthorblockA{Department of EE\\ University of Notre Dame\\ Notre Dame, IN 46556, USA\\ Email: [email protected]} \and \IEEEauthorblockN{Massimo Franceschetti} \IEEEauthorblockA{Department of ECE\\ University of California, San Diego\\ La Jolla, CA 92093, USA\\ Email: [email protected]} } %\author{Paolo~Minero~\IEEEmembership{Member, IEEE} and Massimo Franceschetti~\IEEEmembership{Senior Member, IEEE} %~\thanks{Paolo Minero is with the Department of Electrical Engineering, University of Notre Dame, Notre Dame, IN, 46556, USA and also with Qualcomm Inc., San Diego, CA, 92121 %(Email: [email protected]).} %~\thanks{M. Franceschetti is with the Dept. of Electrical and Computer Engineering %University of California at San Diego, %La Jolla, California 92093--0407. %Email: [email protected]}} \maketitle \begin{abstract} Several new expressions for the anytime capacity of Sahai and Mitter are presented for a time-varying rate-limited channel with noiseless output feedback. These follow from a parametric characterization obtained in the case of Markov channels, and include an explicit formula for the $r$-bit Markov erasure channel, as well as formulas for memoryless rate processes including Binomial, Poisson, and Geometric distributions. Beside the memoryless erasure channel and the additive white Gaussian noise channel with input power constraint, these are the only cases where explicit anytime capacity formulas are obtained. At the basis of these results is the study of the threshold function for $m$th moment stabilization of a scalar linear system controlled over a Markov time-varying digital feedback channel that depends on $m$ and on the channel's parameters. This threshold is shown to be a continuous and strictly decreasing function of $m$ and to have as extreme values the Shannon capacity and the zero-error capacity as $m$ tends to zero and infinity, respectively. Its operational interpretation is that of achievable communication rate, subject to a reliability constraint. %that depends on the moment $m$. %The stochastic stability of a scalar linear system controlled over a Markov time-varying digital feedback channel with noiseless output feedback is considered. It is shown that the $m$th moment of the closed-loop system can be kept bounded if and only if the rate of growth of the unstable open-loop system is smaller than a threshold that depends on $m$ and of the channel parameters. This threshold is shown to be a continuous and strictly decreasing function of $m$ and to have as extreme values the Shannon capacity and the zero-error capacity as $m$ tends to zero and infinity, respectively. Its operational interpretation is that of achievable communication rate, subject to a varying reliability constraint that depends on the moment $m$. %Using this result, a parametric characterization of Sahai's anytime capacity for a class of Markov channels is provided. An explicit expression for the anytime capacity of an $r$-bit Markov erasure channel is obtained, which generalizes the one for the memoryless case obtained by Sahai in parametric, and by Xu and Sahai in explicit form. In the memoryless case, a general expression of the anytime capacity is obtained in terms of the moment generating function of the rate process, and this is computed explicitly for the Binomial, Poisson, and Geometric distributions. % For a two-state memoryless communication channel, a converse to the achievable scheme of Y\"{u}ksel and Meyn is provided, and when the rate process can take values in $\{0,\infty\}$ results for the packet erasure model are recovered. Finally, the anytime error exponent is compared to the error exponent of variable-length block-codes over Markov channels with feedback of Como, Y\"{u}ksel, and Tatikonda. %The proofs rely on a novel necessary and sufficient condition for the stochastic stability of Markov jump linear systems. The sufficient condition is obtained via the idea of subsampling, while the necessary condition is based on the maximum entropy theorem and the entropy power inequality. \end{abstract} %\begin{IEEEkeywords} %Anytime capacity, entropy power inequality, Markov channels, Markov jump linear systems, quantized control. %\end{IEEEkeywords} \section{Introduction} We consider the problem of moment stabilization of a dynamical system where the estimated state is transmitted for control over a time-varying communication channel. %This problem has been studied extensively in the context of networked control systems and discussed in several special issue journals dedicated to the topic~\verb|\cite{Baillieul--Antsaklis20071,Baillieul--Antsaklis20072,Franceschetti-Kumar-Mitter-Teneketzis-2008}|. Recently, it gained renewed attention due to its relevance for the design of cyberphysical systems~\verb|\cite{Kim-Kumar2012}|. A tutorial review of the problem with extensive references appears in~\verb|\cite{Franceschetti-Minero-2014}|. The notion of Shannon capacity is in general not sufficient to characterize the trade-off between the entropy rate production of the plant, expressed by the growth of the state space spanned in open loop, and the communication rate required for its stabilization. A large Shannon capacity is useless for stabilization if it cannot be used in time for control. For the control signal to be effective, it must be appropriate to the current state of the system. Since decoding the wrong codeword implies applying a wrong signal and driving the system away from stability, applying an effective control signal depends on the history of whether previous codewords were decoded correctly or not. In essence, the stabilization problem is an example of \emph{interactive communication}, where two-way communication occurs through the feedback loop between the plant and the controller. %Error correcting codes developed independently in this context~\verb|\cite{Forney1974,Schulman1996,Ostrovsky-etal-2009}| have a natural tree structure representing past history and are natural candidates to be used for control. Alternative capacity notions with stronger reliability constraints than having a vanishing probability of error, %and requiring these type of coding schemes have been proposed in the context of control, including the zero-error capacity~\verb|\cite{matveed-savkin2007b}|, originally introduced by Shannon~\verb|\cite{Shannon1956}|, and the anytime capacity proposed by Sahai and Mitter~\verb|\cite{Sahai2001}|, \verb|\cite{Como-Fagnani-Zampieri2010,Sahai-Mitter2006,simsek-jain-varaija2004,Xu2005}|. %hassibi potato Within this general framework, we focus on the $m$th moment stabilization of an unstable scalar system whose state is communicated over a rate-limited channel capable of supporting $R_k$ bits at each time step and evolving randomly in a Markovian fashion, see Fig.~\verb|\ref{fig:scheme}|. The rate process is known casually to both encoder and decoder. Many variations of this ``bit-pipe'' model have been studied in the literature %ball1 e ball2 potati per ragioni di spazio ~\verb|\cite{Wong-Brocket1999,Brocket-Liberzon2000,Liberzon2003,Delchamps1990,Martins-Dehleh-Elia2006,Nair-Evans-2004,Imer-etal-2006,Tatikonda-Mitter2004,Tatikonda-Mitter2004b,Gupta-Martins2010,Yuksel2010,Yuksel-Basar2011,Borkar-Mitter-1997,Minero-etal-2009,Coviello-etal2012,Gupta-etal-2007,Gupta-etal-2009,Schenato-etal-2007,Huang-Dey-2007,Elia2005,Mitter-Elia2001}|, including the case of fixed rate channel; the erasure channel where the rate process can assume value zero; and the packet loss channel, where the rate process can oscillate randomly between zero and infinity, allowing a real number of infinite precision to be transported across the channel in one time step. Connections between the rate limited and the packet loss channel have been pointed out in~\verb|\cite{Minero-etal-2009,Coviello-etal2012}|, showing that results for the latter model can be recovered by appropriate limiting arguments. %The additive white Gaussian channel has been considered in~\verb|\cite{Tatikonda-etal2004,Braslavsky-Middleton-Freudenberg2007,Middleton-Roja2009,Freudenberg-Middleton-Solo2010,Goodwin-Quevedo2010}| and in this case the Shannon capacity is indeed sufficient to express the rate needed for stabilization. Extensions to the additive colored Gaussian channel~\verb|\cite{Ardestanizadeh--franceschetti2012}| show that the maximum ``tolerable instability'' --- expressed by the sum of the logarithms of the unstable eigenvalues of the system that can be stabilized by a linear controller with a given power constraint over a stationary Gaussian channel--- corresponds to the Shannon feedback capacity~\verb|\cite{Kim2010}|, that assumes the presence of a noiseless feedback link between the output and the input of the channel and that is subject to the same power constraint. This result suggests a duality between the problems of control and communication in the presence of feedback, and indeed it has been shown that efficient feedback communication schemes can be obtained by solving a corresponding control problem~\verb|\cite{Elia2004,Ardestanizadeh-etal-2012}|. The major contribution of this paper is the introduction of a stability threshold function of the channel's parameters and of the moment stability number $m$ that converges to the Shannon capacity for $m \rightarrow 0$, to the zero-error capacity for $m \rightarrow \infty$, and it provides a parametric characterization of the anytime capacity for the remaining values of $m$. This function yields a novel anytime capacity formula in the special case of the $r$-bit Markov erasure channel, as well as formulas for memoryless rate processes including Binomial, Poisson, and Geometric distributions. %To prove our results, we require some novel extensions of the theory Markov Jump Linear Systems (MJLS), that are of independent value. %On the technical side, the sufficient condition for stability is obtained exploiting the idea of subsampling, while the necessary condition is based on the maximum entropy theorem and the entropy power inequality. %In passim, although we do not deal with the case of vector systems directly, we point out that our results can be extended to this case exploiting usual bit-allocation techniques outlined in~\verb|\cite{Minero-etal-2009,Coviello-etal2012}|, at the expense of a more technical treatment that does not add much to the engineering insight. %The rest of the paper is organized as follows. Section~II describes the system and channel model and introduces the stability threshold function, illustrating some of its properties. Section~III describes relationships with the anytime capacity, and provides some representative examples. Section~IV provides the formula for the anytime capacity of the Markov erasure channel, and Section V considers the memoryless case. All proofs and auxiliary results are given in the appendix. Throughout the paper, the following notation is used. Logarithms are assumed to be in base two; random variables are denoted with uppercase letters, while their realizations with lowercase letters; matrices are also denoted in uppercase letters, using the special typeset $\mtx{A}$. %All proofs and auxiliary results are given in the appendix. \section{Moment Stabilization over Markov Channels} \label{sec:scalar} We consider the stability of linear dynamical systems when the estimated state is sent to the controller over a digital communication link whose state is described by a Markov process, as depicted in Fig.~\verb|\ref{fig:scheme}|. \begin{figure} \begin{center} \psfrag{C}[c]{Controller} \psfrag{P}[c]{Plant} \psfrag{E}[c]{Estimator} \psfrag{N}[c]{Encoder} \psfrag{D}[c]{Decoder} \psfrag{S}[c]{Channel state} \psfrag{R}[c]{$R_k$} \psfrag{I}[c]{information} \psfrag{d4}[c]{} \includegraphics[scale=0.3]{scheme3.eps} \end{center} \caption{Feedback loop model. The estimated state is quantized, encoded and sent to a decoder over a digital channel of state $R_k$ that evolves in time according to a Markov process. %The rate process $\{R_k\}_{k\ge 0}$ evolves as a Markov chain and is known casually to both encoder and decoder. } \label{fig:scheme} \end{figure} \subsection{System model} %\label{sec:scalarmain} Consider the scalar dynamical system \begin{align} \label{eq:scalsys} x_{k+1} & =\lambda x_k+u_k+v_k, \nonumber \\ y_k &=x_k+w_k, \end{align} where $k \in \mathbb{N}$, and $\la \geq1$. The variable $x_k$ represents the state of the system, $u_k$ the control input, $v_k$ is an additive stochastic disturbance, $y_k$ is the sensor measurement, and $w_k$ is the measurement noise. Both disturbance and noise are independent of each other and of the initial condition $x_0$. They are also independent of the channel state, as defined below. \subsection{Channel Model} The state observer is connected to the actuator through a noiseless digital communication link that at each time $k$ allows transmission without errors of $R_k$ bits. The rate process $\{R_k\}_{k\geq 0}$ is modeled as a homogeneous positive-recurrent Markov process defined on the finite set \begin{align} \label{eq:support} \mathcal{R} = \{\bar{r}_1,\ldots,\bar{r}_n\}, \end{align} for some integer numbers $0 \le \bar{r}_1 < \dots < \bar{r}_n$, and with one-step transition probability matrix $\mtx{P}$ having entries \begin{align} \label{eq:P} p_{ij} = \pr{R_k = \bar{r}_j| R_{k-1} = \bar{r}_i} \end{align} for every $i,j \in \{1,\dotsc,n\}$. In the sequel, we define $\mtx{R} \in \mathbb{Z}_+^{n\times n}$ as the diagonal matrix with diagonal entries $ \bar{r}_1 , \dots , \bar{r}_n$, i.e., \begin{align} \label{eq:Rdiag} \mtx{R} = \text{diag}(\bar{r}_1 , \dots , \bar{r}_n). \end{align} Encoder and decoder are supposed to have causal knowledge of the rate process. %In the information theory literature, a communication channel is defined by a triple $(\mathcal{X},p(y|x),\mathcal{Y})$ that consists of an input set $\mathcal{X}$, an output set $\mathcal{Y}$ and a transition probability matrix $p(y|x)$ for every $x \in \mathcal{X}$ and $y \in \mathcal{Y}$. A communication channel with state sequence $\{s_k, k \ge 0\}$ is defined as a triple $(\mathcal{X}\times \mathcal{S},p(y|x,s),\mathcal{Y})$ with finite state set $\mathcal{S}$. This noiseless digital link corresponds to a discrete-memoryless channel with Markov state available causally at both the encoder and the decoder. A channel with state is defined by a triple $(\mathcal{X}\times \mathcal{S},p(y|x,s),\mathcal{Y})$ consisting of an input set $\mathcal{X}$, a state set $\mathcal{S}$, an output set $\mathcal{Y}$, and a transition probability matrix $p(y|x)$ for every $x \in \mathcal{X}$, $s \in \mathcal{S}$, and $y \in \mathcal{Y}$. This channel is memoryless if the output $y_k$ at time $k$ is conditionally independent of everything else given $(x_k,s_k)$. The state sequence is Markov if $S_0,S_1,\dotsc$ forms a Markov chain. According to these definitions, our channel model is a discrete-memoryless channel with Markov state $(\mathcal{X}\times \mathcal{S},p(y|x,s),\mathcal{Y})$ with $\mathcal{X}=\mathcal{Y}=\{1,\dotsc,\bar{r}_n\}$, $\mathcal{S} = \{\bar{r}_1,\cdots,\bar{r}_n\}$, \begin{align} p(y|x,s) = \begin{cases} 1 & x=y \text{ and } x \le s \\ 0. & \text{otherwise} \end{cases} \end{align} and state transition probabilities \begin{align} p(s_{k+1}= \bar{r}_j|s_k = \bar{r}_i) = p_{ij}. \end{align} The Shannon capacity of this channel is~\verb|\cite{GoldsmithVaraiya1996}| \begin{align} C = \sum_{i=1}^n \pi_i \bar{r}_i, \label{eq:shanc} \end{align} where $(\pi_1,\dotsc,\pi_n)$ denotes the unique stationary distribution of $\mtx{P}$. The zero-error capacity of this channel is~\verb|\cite{Shannon1956}| \begin{align} C_{0} = \bar{r}_1. \label{eq:shanz} \end{align} The capacities in (\verb|\ref{eq:shanc}|) and (\verb|\ref{eq:shanz}|) are the limiting values of a stability threshold function indicating the channel's rate-reliability constraint required to achieve a given level of stabilization. As $m \rightarrow \infty$ and the system is highly stable, then the stability threshold function tends to the zero-error capacity that has a hard reliability constraint of providing no decoding error. Conversely, as $m \rightarrow 0$ and the system's stability level decreases, then the stability threshold function tends to the Shannon capacity that has a weak reliability constraint of vanishing probability of error. \subsection{Stability threshold function} The system~\eqref{eq:scalsys} is $m$th moment stable if \begin{equation} \label{eq:sta} \sup_k \E[|X_k|^m] 0$. \item Convergence to the Shannon capacity: \begin{equation} \label{eq:fact2} \lim_{m \to 0} R(m) = \sum_{i=1}^n \pi_i \bar{r}_i = C. \end{equation} %where $C$ denotes the Shannon capacity of the Markov channel. % \item Convergence to the Zero Error capacity: \begin{equation} \label{eq:fact3a} R(m) \sim \bar{r}_1 - \frac{1}{m} \log p_{11}, \quad \mbox{as } m \to \infty, \end{equation} and hence \begin{equation} \label{eq:fact3b} \lim_{m\to \infty} R(m) = \bar{r}_1 = C_0. \end{equation} %where $C_0$ denotes the zero-error capacity of the Markov channel. % \item Sensitivity with respect to self-loop probabilities: \begin{equation} \label{eq:fact4} \frac{d R(m)}{ d p_{ii}} = - \frac{2^{-m \bar{r}_{ii}}}{m \rho(\mtx{P}^\tra 2^{-m\mtx{R}})} \; \frac{| \mtx{D}(1) |}{\sum_{i=1}^n |\mtx{D}(i)|} < 0, \end{equation} where $\mtx{D}:=\rho(\mtx{P}^\tra 2^{-m\mtx{R}}) \mtx{I} - \mtx{P}^\tra 2^{-m \mtx{R}}$, $\mtx{I}$ denotes the $n\times n$ identity matrix, and $|\mtx{D}(i)|$ is the determinant of the matrix obtained by eliminating the $i$th row and the $i$th column from $\mtx{D}$. We also have the asymptotic behavior \begin{equation} \label{eq:fact4b} \frac{d R(m)}{ d p_{11}} \sim - \frac{1}{m p_{11} \ln(2)} \mbox{ as } m \to \infty. \end{equation} % \item The function $mR(m)$ is nonnegative, strictly increasing, and strictly concave. If $\bar{r}_1 \neq 0$, then $mR(m)$ grows unbounded as $m\to \infty$. If instead $\bar{r}_1 =0$, then \begin{equation} \lim_{m \to \infty}mR(m) = - \log p_{11}. \end{equation} \end{enumerate} \end{prop} % %\medskip %\begin{proof} %See Appendix~\verb|\ref{App1}|. %\end{proof} %\medskip %%%%%%%%%%%%%%%%%%%%%% \section{Anytime Capacity of Markov Channels} We relate the stability threshold function $R(m)$ to the anytime capacity. %$R(m)$ depends on both the system's stability level $m$ and on the properties of the channel via the transition matrix $\mtx{P}$ and the matrix of rate values $\mtx{R}$. We show that For the given Markov channel, it provides a parametric representation of the anytime capacity in terms of system's stability level $m$. The anytime capacity is defined in the following context~\verb|\cite{Sahai-Mitter2006}|. Consider a system for information transmission that allows the decoding time to be infinite, and improves the reliability of the estimated message as time progresses. More precisely, at each step $k$ in the evolution of the plant a new message $m_k$ of $r$ bits is generated that must be sent over the channel. The coder sends a bit over the channel at each $k$ and the decoder upon reception of the new bit updates the estimates for all messages up to time $k$. It follows that at time $k$ messages \begin{equation} m_0,m_1,\dotsc,m_k \nonumber \end{equation} are considered for estimation, while estimates \begin{equation} \hat{m}_{0|k},\hat{m}_{1|k},\dotsc,\hat{m}_{k|k} \nonumber \end{equation} are constructed, given all the bits received up to time $k$. Hence, the processing operation for any message $m_i$ continues indefinitely for all $k \ge i $. A reliability level $\alpha$ is achieved in the given transmission system if for all $k$ the probability that there exists at least a message in the past whose estimate is incorrect decreases $\alpha$-exponentially with the number of bits received, namely for all $d \leq k$ \begin{equation} \label{eq:relia} \pr{ (\hat{M}_{0|k},\dotsc,\hat{M}_{d|k}) \ne (M_0,\dotsc,M_d) } = O( 2^{-\alpha d }) . \end{equation} The described communication system is characterized by a rate-reliability pair~$(r, \alpha)$. The work in~\verb|\cite{Sahai-Mitter2006}| has shown that for scalar systems the ability to achieve stability depends on the ability to construct such a communication system, in terms of achievable coding and decoding schemes, with a given rate-reliability constraints. To state this result in the context of our Markov channel, let the $\alpha$-anytime capacity $C_A(\alpha)$ be the supremum of the rate $r$ that can be achieved with reliability $\alpha$. The problems of $\alpha$-reliable communication and $m$th moment stabilization of a scalar system over a Markov channel are then equivalent in the sense of the following theorem. \medskip \begin{theorem}[Sahai, Mitter~\verb|\cite{Sahai-Mitter2006}|] \label{thm2} The necessary and sufficient condition for $m$th moment stabilization of a scalar system with bounded disturbances and in the presence of channel output feedback over a Markov channel is \begin{equation} \log |\lambda| \lesssim C_A(m \log |\lambda|). \label{anytime2} \end{equation} \end{theorem} % %\medskip %\begin{proof}{[{\color{red}TO DO}]} %I think this should be immediate from Sahai's paper. %\end{proof} %\medskip The anytime capacity is an intermediate notion between the zero-error capacity and the Shannon capacity. The zero-error capacity requires transmission without error. The Shannon capacity requires the decoding error to tend to zero by increasing the length of the code. In the presence of disturbances, only a critical value of the zero-error capacity can guarantee the almost sure stability of the system~\verb|\cite{matveed-savkin2007b}|. On the other hand, for scalar systems in presence of bounded disturbances, a critical value of the anytime capacity can guarantee the ability to stabilize the system in the weaker $m$th moment sense. By combining Theorem~\verb|\ref{thm1}| and Theorem~\verb|\ref{thm2}|, we obtain the following result. \medskip \begin{theorem} \label{th:combining} The following holds: \begin{enumerate} \item Parametric characterization of the anytime capacity: For every $m >0$, the anytime capacity $C_A$ satisfies \begin{equation} C_A\bigl(m R(m) \bigr) = R(m), \label{anytime2} \end{equation} i.e., for every $\alpha \ge 0$, there exists a unique $m(\alpha)$ such that \begin{equation} \label{eq:par} m(\alpha) R\bigl( m(\alpha) \bigr) = \alpha \end{equation} and \begin{equation} \label{anytime2b} C_A(\alpha) = R\bigl( m(\alpha) \bigr) = \frac{\alpha}{ m(\alpha)}. \end{equation} % \item $C_A(\alpha)$ is a strictly decreasing function function of $\alpha > 0$. \item Convergence to the Shannon capacity: \begin{equation} \label{eq:fact2} \lim_{\alpha \to 0} C_A(\alpha) = \sum_{i=1}^n \pi_i r_i = C, \end{equation} % \item Convergence to the Zero Error capacity: If $\bar{r}_1 = 0$, then for every $\alpha \ge \log(1/p_{11})$ \begin{equation} \label{eq:fact3a1} C_A(\alpha) = 0 = C_0. \end{equation} Conversely, if $\bar{r}_1 \neq 0$, then $C_A(\alpha)$ has unbounded support and \begin{equation} \label{eq:fact3a2} C_A(\alpha) \sim \bar{r}_1 \; \frac{\alpha}{\alpha- \log(1/p_{11}) } , \quad \text{ as } \alpha \to \infty, \end{equation} hence \begin{equation} \label{eq:fact3b} \lim_{\alpha \to \infty} C_A( \alpha) = \bar{r}_1 = C_0. \end{equation} % \end{enumerate} \end{theorem} \medskip % %%%%%%%%%%%%%%%%%%%%%% \section{The Markov Erasure Channel} We use the stability threshold function $R(m)$ to compute the anytime capacity of the Markov erasure channel. The Markov erasure channel corresponds to a two-state Markov process where $n=2$ $\mathcal{R}=\{0,\bar{r}\}$, $p_{12} = q$, and $p_{21}=p$, where $0

= -\log_2 (1-q) $ because the probability that $d$ consecutive zeros occurs is $(1-q)^d= 2^{-d (-\log_2(1-q))}$. In the case $n=2$, $\bar{r}_1=0$, $\bar{r}_2=r$, and an i.i.d. rate process with $\pr{R_k = 0}= p_1$ and $P\{R_k = r\}= p_2$ for all $k$'s, then the stability condition becomes \begin{align*} \la^m \left( p_1 + p_2 2^{-mr } \right)<1, \end{align*} which provides a converse to the achievable scheme of Y\"{u}ksel and Meyn~\verb|\cite[Theorem 3.3]{Yuksel--Meyn2013}|. If we further let $r~\to~\infty$, then the stability condition $p_1>1/\la^m$ depends only on the erasure rate of the channel. In this case, our condition generalizes the packet loss model result in~\verb|\cite{Gupta-etal-2007}|. \section{Memoryless Channels} Consider the special case of an i.i.d. rate process $R_k$ where $R_k \sim R$ has probability mass function $p_i=\pr{R=\bar{r}_i}$, $\bar{r}_i \in \mathcal{R}$. For $t$ real, Let $M_R(t)= \mathbb{E}( e^{tR})$ denote the moment generating function of $R$ and let $M_R^{-1}(y)$ denotes the inverse of the $M_R(t)$, if it exists, i.e., $M_R^{-1}(y) = t $ if and only if $M_R(t)=y$. We have the following result. \medskip \begin{theorem} \label{thm:memo} The anytime capacity of a memoryless channel with rate distribution $R$ is \begin{equation} C_A(\alpha) = \frac{\ln 2^{-\alpha} }{M_R^{-1}( 2^{- \alpha})} \label{eq:mmf3} \end{equation} for $\alpha < -\log p_{11}$ if $\bar{r}_0 = 0$, or for any $\alpha > 0 $ if $\bar{r}_0 \neq 0$. \end{theorem} \medskip Theorem~\verb|\ref{thm:memo}| shows that in the case where the channel is memoryless, the anytime capacity can be evaluated by computing the inverse of the moment generating function of $R$, as illustrated in the next three examples. \begin{example} \label{ex4} Suppose that $R$ is a binomial random variable with parameters $k$ and $1-p$. Then, \begin{equation} M_R(t) = ( p + (1-p) e^{t})^k \end{equation} and \begin{equation} M_R^{-1}(y) = \ln \frac{y^{1/k} - p}{ 1-p }, \quad y < p^k, \end{equation} and thus by~\eqref{eq:mmf3} \begin{align} \label{eq:xu2} C_A(\alpha) = \frac{ \alpha }{ \alpha/k + \log_2 \left(\frac{ 1-p }{1-p2^{\alpha/k}}\right) } \end{align} for $\alpha < - k \log p$. Notice that ~\eqref{eq:xu2} recovers~\eqref{eq:xu} in the special case $k=1$ in which $R$ is Bernoulli with parameter $1-p$. \end{example} \begin{example} \label{ex4} Suppose that $R$ is a Poisson random variable with parameter $\lambda$. Then, \begin{equation} M_R(t) = e^{\lambda (e^{t}-1)} \end{equation} and \begin{equation} M_R^{-1}(y) = \ln (1 + 1/\lambda \ln y), \quad y > 0, \end{equation} and thus by~\eqref{eq:mmf3} \begin{align} C_A(\alpha) = -\frac{ \alpha }{ \log( 1 -\alpha/\lambda \ln 2 ) } \end{align} for $\alpha < - \lambda/\ln 2 $. \end{example} Notice in both examples $\bar{r}_1=0$, so the anytime capacity has bounded support. In the next example $\bar{r}_1=1$ and thus the anytime capacity is defined for all $\alpha>0$. \begin{example} \label{ex3} Suppose that $R$ is a geometric random variable with parameter $p$. Then, \begin{equation} M_R(t) = \frac{p e^{t}}{ 1- (1-p)e^{t} } \end{equation} for $t < - \ln(1-t)$ and \begin{equation} M_R^{-1}(y) = \ln \frac{y}{ p + y(1-p) }, \quad y > 0, \end{equation} for $y>0$, and thus \begin{align} C_A(\alpha) = \frac{\alpha }{ \log\bigl( (1-p) + p 2^{\alpha} \bigr) }, \quad \alpha > 0. \end{align} \end{example} \begin{figure} \begin{center} \includegraphics[width=3.1in]{any_comp.eps} \end{center} \caption{Comparison of the anytime capacity for different memoryless channels. For the uniform distribution the plot is obtained numerically.} \label{fig:t3} \end{figure} %\section{Conclusion} % %{\color{red} Incomplete} %A commonly encountered notion of stochastic stability in the MJLS literature~\verb|\cite{Costa-et-al-2004}| is the one of strong moment stability, according to which the MJLS~\eqref{eq:mjls} is stable if there exists a finite $\alpha_m \in \mathbb{R}$ such that %$ \E[ |Z|^m ] \to \alpha_m, \qquad \text{ as } m \to \infty. $ %Clearly, if a MJLS is strongly stable, then it is also weakly stable. It can be verified that the proof of the necessary condition in Theorem~\verb|\ref{lemmamjls}| extends to the case of strong moment stability, thereby establishing the necessity of~\eqref{eq:pp2} for the strong moment stability of~\eqref{eq:mjls}. In contrast, the subsampling technique used in the direct part of Theorem~\verb|\ref{lemmamjls}| is not suitable to prove strong moment stability because it cannot prevent undesirable fluctuations in the system dynamics between any two sampling periods. %In~\verb|\cite{minero-etal-2013}|, we presented a different proof technique based on the binomial expansion of $(a_k z_k+w_k)^m$ which can be used to prove that~\eqref{eq:pp} is a sufficient condition for the strong moment stability of~\eqref{eq:mjls}. This approach however requires $m$ to be an integer number, as opposed to the subsampling technique which can be applied to any $m>0$. % %\subsection{Error exponents} %We now compare the the anytime error exponent to error exponents of block codes. Specifically, we can compare the curves in Fig.~\verb|\ref{fig:scheme}| obtained by evaluating~\eqref{eq:anyMarkov} with the results by Como, Y\"{u}ksel, and Tatikonda in \verb|\cite[Section VI]{Comoetal2009}|. \emph{to do} %\begin{figure} %\begin{center} %\includegraphics[scale=0.5]{BECMemory.eps} %\end{center} %\caption{Anytime reliability of a BEC with Markov erasure for different recovery probability $q$ and $p=0.8$.} \label{fig:scheme} %\end{figure} \bibliographystyle{IEEEtran} \bibliography{bib_control} %\appendices %\section{Stability threshold function examples} %%We now give some representative examples of the stability threshold function, visually showing its extremal properties and its relationship with the anytime capacity. %\begin{example} %\label{ex1} %Let $n=4$,$\mathcal{R}=\{1,3,4,5\}$ and $\mtx{P}$ be is a $4 \times 4$ circulant matrix with first row equal to $\frac{1}{16}\left(1,13,1,1\right)$, namely %\begin{equation} %\label{eq:exP} %P= \frac{1}{16} %\begin{bmatrix} % 1 & 13 & 1 & 1 \\[0.3em] % 13 & 1 & 1 & 1 \\[0.3em] % 1 & 1 & 1 & 13 \\[0.3em] % 1 & 1 & 13 & 1 % \end{bmatrix}. %\end{equation} %In this case it is easy to compute $C= \frac{1}{4}(1+3+4+5) = \frac{13}{4}$ and $C_0=1$. Figure~\verb|\ref{fig:t2}| plots the stability threshold function $R(m)$ (together with its asymptotic approximation) and the anytime capacity $C_A(\alpha)$. Both curves have the same shape and they are in fact related by an affine transformation as $m$ grows. Furthermore, both curves have unbounded support and tend to one at infinity. There is a change of convexity for small values of $m$ and $\alpha$, as indication that $R(m)$ and $C_A(\alpha)$ are generally not convex functions. In contrast, the function $\phi(m)=m R(m)$, reported in red in the top plot of Figure~\verb|\ref{fig:t2}|, is strictly convex and increasing. %\end{example} %\begin{figure} %\begin{center} %\includegraphics[width=3.2in]{newt2.eps} %\includegraphics[width=3.2in]{c2.eps} %\end{center} %\caption{Stability threshold function and anytime capacity for Example~\verb|\ref{ex1}|} \label{fig:t2} %\end{figure} % %\begin{example} %\label{ex2} %Let $\mathcal{R}=\{0,3,4,5\}$ and $\mtx{P}$ is as in~\eqref{eq:exP}. The only difference with the previous example is that $\bar{r}_1$ is now 0 instead than 1. In this case it is easy to compute $C= \frac{1}{4}(0+3+4+5) = 3$ and $C_0=0$. Figure~\verb|\ref{fig:t1}| plots the stability threshold function $R(m)$ (together with its asymptotic approximation) and the anytime capacity $C_A(\alpha)$. In this case, while $R(m)$ has unbounded support, $C_A(\alpha)$ is zero for all $\alpha \ge - \log p_{11} = \log 16 = 4$. This occurs because the function $\phi(m)=m R(m)$ saturates as $m \rightarrow \infty$, tending to the limiting value $- \log p_{11}=4$. %\end{example} %\begin{figure} %\begin{center} %\includegraphics[width=3.2in]{newt1.eps} %\includegraphics[width=3.2in]{c1.eps} %\end{center} %\caption{Stability threshold function and anytime capacity for Example~\verb|\ref{ex2}|.} \label{fig:t1} %\end{figure} % %When viewed together, the two examples above show that for some channels a communication system with an arbitrary rate-reliability pair $(r,\alpha)$ cannot be constructed, because the anytime capacity may have bounded support and tend abruptly to zero. However, in order to achieve $m$th moment stabilization it is sufficient to consider the simpler function $R(m)=C_A(m R(m))$, and construct a communication system whose reliability level depends on the desired stabilization level. It follows that we do not need to compute the whole anytime capacity if we are interested only in moment stabilization, and we may be content with determining the threshold function $R(m)$ corresponding to its parametric representation. The extremal properties of $R(m)$ determine the support of the anytime capacity corresponding to the achievable reliability level $\alpha$. If $R(m)=O(1/m)$ then the anytime capacity has support bounded by the pre-constant of the asymptotic order. On the other hand, if $R(m)$ decreases at most sub-linearly to zero, or it tends to a constant zero-error capacity, then the anytime capacity has unbounded support and any reliability level $\alpha$ is achieved. % % % %\section{Auxiliary Results} %We first state without proof a series of auxiliary results that are needed in the rest of the paper. We begin with a maximum entropy result which generalizes the well-know result that the second moment of a continuous random variable $X$ is lower bounded by its entropy power $e^{2h(X)}$ to the case of the $L^m$-norm of a continuous random vector. % %\begin{lemma} \label{lem:vme} % Let ${X}$ be a continuous $n$-dimensional vector-valued random variable with $\mathbb{E}[||{{X}}||_m^m]0$, it follows that %\begin{align} %\label{eq:0} %\mathbb{E}(|Y|^m) & \le 2^m \mathbb{E}(| B_k|^m |Y_k |^m) + \text{const}, %\end{align} %where $\text{const}$ represents a uniform bound on the $m$th moment of $\{C_k\}_k$. Let $v_{k,i} = \mathbb{E}( |Y_{k+1}|^m 1_{\{A_{k} = \bar{a}_i}\} )$, $i=1,\dotsc,n$. By repeatedly applying the law of total probability on both sides of~\eqref{eq:0}, it can be easily seen that the vector $v_k=(v_{k,1},\ldots,v_{k,n})^T$ satisfies the following component-wise vector inequality %\begin{equation} %v_{k+1} \leq 2^m \bigl( \mtx{P}^\tra \mtx{A}\bigr)^\tau v_{k} + c, %\end{equation} %where $c$ is again a constant that only depends on the statistics of the disturbance process. If~\eqref{eq:pp2} holds we can choose the subsampling rate $\tau$ large enough to ensure that $\rho(2^m \bigl( \mtx{P}^\tra \mtx{A}\bigr)^\tau) <1$ and thus that the above recursion remains bounded. Since $\E(|Y_{k}|^{m}) = \sum_{i=1}^{n}v_{k,i} $ it follows that $\rho(\mtx{P}^\tra \mtx{A}) < 1$ is sufficient to ensure that $\sup_{k} \E(|Y_k|^m) < \infty$ as desired. % %The proof of the necessary condition relies on the fact that the non-homogeneous MJLS $\{Z_k\}_k$ is weakly stable only if the homogeneous MJLS obtained by setting $w_k=0$ in~\eqref{eq:mjls} is weakly stable. Therefore, we focus on the homogeneous setting wherein $w_k=0$ for all $k$ and let $v_{k,i} = \mathbb{E}( |Z_{k+1}|^m 1_{\{A_{k} = \bar{a}_i}\} )$ for $i=1,\dotsc,n$. By~\eqref{eq:mjls} and the law of total probability, the vector $v_k=(v_{k,1},\ldots,v_{k,n})^T$ evolves over time according to the linear system %\begin{equation} %v_{k+1} = \mtx{P}^\tra \mtx{A} v_{k}. %\end{equation} %Since $\E(|Z_{k}|^{m}) = \sum_{i=1}^{n}v_{k,i} $, we conclude that~\eqref{eq:weak} can hold only if the above linear system is stable and therefore only if $\rho(\mtx{P}^\tra \mtx{A}) \leq 1$, as claimed. %% %\end{IEEEproof} % %%Notice the trivial gap between the necessary and sufficient conditions in~\eqref{eq:pp} and ~\eqref{eq:pp2}. %in the case where $c \neq 0$ and $m < 2$. In this regime the EPI cannot be applied. %%To express the condition for transition to instability with a single inequality, while indicating the existence of this trivial gap, in the rest of the paper we write %%\begin{equation} %%\label{eq:sss} %%\rho(\mtx{P}^\tra \mtx{A}) \lesssim 1. %%\end{equation} %Theorem~\verb|\ref{lemmamjls}| extends the well known conditions for second moment stability given in~\verb|\cite{Costa-et-al-2004}| to $m$-th moment stability. A similar result appears in~\verb|\cite[Theorem 3.2]{fang--etal1994}| in the special case of a homogeneous MJLS driven by an i.i.d. rate process. % %%%%%%%%%%%%%%%%%%%%%% % %\section{Proofs} %\subsection{Theorem~\verb|\ref{thm1}|} % %{\em Necessity}. To establish the necessary condition, we prove that for every $k=0,1,\ldots$, %%\begin{equation} %%\E[|x_k|^2] \geq \tfrac{1}{2\pi e}\E[|z_k|^2], %%\end{equation} %\begin{equation} %\label{eq:lbstate2} %c_m \E[\vert X_k\vert^m] \ge \E[\vert Z_k\vert], %\end{equation} %where $c_m$ is a constant defined in Lemma~\verb|\ref{lem:vme}| and $\{z_k\}$ is a homogeneous MJLS with dynamics %\begin{align} %z_{k+1}=\frac{|\lambda|^m}{2^{mr_k}} z_k %\label{eq:zzzz} %\end{align} %and $z_0=e^{2h(X_0)}$. % %Let $S^k = \{S_0,\ldots,S_k \}$ denote the symbols transmitted over the digital link up to time $k$. By the law of total expectation and Lemma~\verb|\ref{lem:vme}| , %\begin{align} %\E ( |X_{k+1}|^m ) & \ge \frac{1}{c_m}~\E_{S^{k}} \bigl( e^{mh(X_{k+1}|S^{k}=a^{k}) }\bigr) %\label{eq:in_0}. %\end{align} %It follows that the $m$-th moment of the state is lower bounded by the average entropy power of $X_k$ conditional on $S^{k}$. From the translation invariance property of the differential entropy, the conditional version of entropy power inequality~\verb|\cite{El-Gamal--Kim2011}|, and Assumption A2, it follows that %\begin{align} %& \E_{S^{k}} \bigl( e^{ m h(X_{k+1} |S^{k}=s^{k}) }\bigr)\nonumber \\ %% %& = \E_{S^{k}} \bigl( e^{m h(\lambda X_{k} + \hat{x}(s^{k}) + V_k |S^{k}=s^{k}) }\bigr) \nonumber \\ %% %% %& \ge \E_{S^{k}} \left( \bigl( e^{2 h(\lambda X_{k} |S^{k}=s^{k}) } + e^{2 h(V_k)} \; \bigr)^{\frac{m}{2}} \right) \nonumber \\ %% %& \ge \E_{S^{k}} \left( e^{m h(\lambda X_{k} |S^{k}=s^{k}) } \right) \nonumber \\ %% %& = |\lambda|^m \E_{S^{k}} \bigl( e^{mh(X_{k} |S^{k}=s^{k}) } \bigr) \label{eq:ineq1} %\end{align} %We can further lower bound~\eqref{eq:ineq1} as in Lemma~1 in~\verb|\cite{Minero-etal-2009}|, and obtain that for every $k \geq 0$ %\begin{equation} %\label{lem_min} %\E_{S^k|S^{k-1},R_k} \bigl( e^{ m h(X_k|S^{k}=s^{k})} \bigr) \geq \frac{1}{2^{mR_k}}e^{ m h(X_k|S^{k-1}=s^{k-1})}, %\end{equation} %with $S_{-1}=\emptyset$. By the tower rule of conditional expectation, it then follows that %\begin{align} %& \E_{S^{k}} \bigl( e^{m h(X_{k} |S^{k}=s^{k}) } \bigr) \geq \nonumber \\ %& \qquad \E_{S^{k-1},R_k} \left( \frac{1}{2^{m R_k } } e^{m h(X_k|S^{k-1}=s^{k-1}) } \right). \label{eq:ineq2} %\end{align} %Combining~\eqref{eq:ineq2} and~\eqref{eq:ineq1} gives %% %\begin{align} %& \E_{S^{k}} \left( e^{ m h(X_{k+1} |S^{k}=s^{k}) }\right) \nonumber \\ %%& = |\lambda|^2 \E_{S^{k-1},R_k} \bigl( \E_{S^{k}|S^{k-1},R_k} \bigl( e^{2h(x_{k} |S^{k}=s^{k}) } \bigr) \bigr) + \beta \nonumber \\ %% %%& \ge |\lambda|^2 \E_{S^{k-1},R_k} \left( \frac{1}{2^{2 R_k} } e^{2h(x_{k} |S^{k-1}=s^{k-1}) } \right) + \beta \nonumber \\ %% %& \geq \E_{R_k} \left( \frac{ |\lambda|^m }{2^{m R_k} } \E_{S^{k-1}|R_k} \bigl( e^{m h(X_{k} |S^{k-1}=s^{k-1}) } \bigr) \right). \label{eq:in_1} %\end{align} %% %Following similar steps and using the Markov chain $S^{k-1}\to ( S^{k-2},R_{k-1}) \to R_k$, we obtain %\begin{align} %& \E_{S^{k-1}|R_k} \bigl( e^{m h(X_{k} |S^{k-1}=s^{k-1}) } \bigr) \nonumber \\ %% %& \geq |\lambda|^m \E_{S^{k-1}|R_k} \bigl( e^{m h(X_{k-1} |S^{k-1}=s^{k-1}) } \bigr) \nonumber \\ %% %%& = |\lambda|^2 \E_{S^{k-2},R_{k-1}|R_k} \left( \E_{S^{k-1}|S^{k-2},R_{k-1}} \bigl[ e^{2h(x_{k-1} |S^{k-1}=s^{k-1}) } \bigr) \right) + \beta \nonumber \\ %% %& \ge \E_{S^{k-2},R_{k-1}|R_k} \left( \frac{|\lambda|^m}{2^{m R_{k-1}}} e^{m h(X_{k-1} |S^{k-2}=s^{k-2}) } \right) \nonumber \\ %% %& = \E_{R_{k-1}|R_k} \left( \frac{|\lambda|^m}{2^{2R_{k-1}}} \E_{S^{k-2}|R_{k-1},R_k} \bigl[ e^{2h(X_{k-1} |S^{k-2}=s^{k-2}) } \bigr] \right). \label{eq:in_2} %% %\end{align} %% %Substituting~\eqref{eq:in_2} into~\eqref{eq:in_1} and re-iterating $k$ times, it follows that $ \E_{S^{k}} \bigl( e^{ m h(X_{k+1} |S^{k}=s^{k}) }\bigr)$ is lower bounded by %\begin{align} %& \E_{R_{k-1},R_k} \Bigl( \frac{|\lambda|^{2m}}{2^{m(R_{k-1} + R_k )}} \nonumber \\ %& \qquad \qquad \times \E_{S^{k-2}|R_{k-1},R_k} \Bigl( e^{m h(X_{k-1} |S^{k-2}=s^{k-2}) } \Bigr) \Bigr) \nonumber \\ %% %& \geq \E_{R_{1},\dotsc,R_k} \left( \frac{|\lambda|^{mk} }{2^{m(R_{1} +\cdots + R_k )}} \E_{S_1 |R_{1},\dotsc,R_k} \Bigl( e^{m h(X_{1} |S_0=s_0 ) } \Bigr) \right) \label{eq:in_3} \\ %% %& = \E \left( \tfrac{|\lambda|^{m(k+1)} }{2^{m(R_{1} +\cdots + R_k )}} \right) e^{mh(X_{0})} \label{eq:in_4}, %% %\end{align} %where~\eqref{eq:in_3} uses the fact that the initial condition of the state $x_0$ is independent of the rate process $R_k$. Let $\{Z_k\}$ be a non-homogeneous MJLS with dynamics %\begin{align} %Z_{k+1}=|\lambda|^m/2^{mR_k} Z_k, %\label{eq:nec_mjls22} %\end{align} %with $z_0=e^{mh(X_0)}$. By taking the expectation on both sides of~\eqref{eq:nec_mjls22} and iterating $k$ times, it is easy to see that the right hand side of~\eqref{eq:in_4} is equal to the first moment of $Z_{k+1}$. Hence, combining~\eqref{eq:in_0}--\eqref{eq:in_4} we conclude that $\E(| X_k |^m) > \frac{1}{c_m}\E ( |Z_k| )$, which is the claim. % %{\em Sufficiency}. Let $\omega$ denote a uniform bound on the measure of the support of the initial condition, the noise, and the disturbance. Then, we claim that the plant dynamics can be bounded as follows %\begin{equation} %\label{eq:in_} %|x_k| \le z_k, \quad k=0,1,\ldots, %\end{equation} %where $Z_k$ is a homogeneous MJLS with dynamics %$$z_{k+1} = \frac{|\lambda|}{2^{R_k}}z_k + \omega, \quad k=0,1,\ldots$$ %and $z_0 = \omega$. To see this, consider the following inductive proof. By assumption, $|x_0|\leq \omega = z_0$. Assume that the claim holds for all times up to $k$, so $|x_{k}|\le z_{k}$. Suppose that at time $k$ the uncertainty set $[-z_k,z_k]$ is quantized using a $r_k$-bit uniform quantizer, and that the encoder communicates to the decoder the interval containing the state. Then, the decoder approximates the state by the centroid $\hat{x}_k$ of this interval and sends to the plant the control input $u_k = -\lambda \hat{x}_k$. By construction $|x_k-\hat{x}_k| \le z_k/2^{r_k}$, thus %\begin{align} %|x_{k+1}| & = |\lambda (x_k-\hat{x}_k) + v_k | \nonumber \\ %& \le |\lambda| |x_k-\hat{x}_k| + \omega \nonumber \\ %& \le \frac{|\lambda|}{2^{r_k}}z_k + \omega \nonumber \\ %& = z_{k+1} %\end{align} %i.e., the claim holds at time $k+1$ as well. It follows that $x_k$ is $m$th moment stable if the homogeneous MJLS $z_k$ is weakly $m$th moment stable, i.e., if and only if~\eqref{eq:Rm} holds. % % %\subsection{Proposition~\verb|\ref{prop1}|} %\label{App1} % %The monotonicity property is an immediate consequence of the log-convexity of the spectral radius of a nonnegative matrix, see, e.g.~\verb|\ref{thm:Friedland}|. Let $ \mtx{D}_1 = - m \mtx{R} \ln 2$, $ \mtx{D}_2 = \mtx{0}_{n\times n}$ and $\alpha = \frac{n}{m}$. Notice that $ 2^{-m \mtx{R}} = e^{ \mtx{D}_1}$, $\log \rho( \mtx{P}^\tra e^{ \mtx{D}_2} ) = 0$, and $\mtx{P}^\tra 2^{-m \mtx{R}} = \mtx{P}^\tra e^{\alpha \mtx{D}_1}$. Then, for every $n < m$, by Lemma~\verb|\ref{thm:Friedland}| %\begin{align} %-n R(m) & = \frac{n}{m} \log \rho( \mtx{P}^\tra 2^{-m \mtx{R}} ) \nonumber \\ %& = \alpha \log \rho( \mtx{P}^\tra e^{\mtx{D}_1} ) + (1-\alpha) \log \rho( \mtx{P}^\tra e^{\mtx{D}_2} ) \nonumber \\ %% %& > \log \rho( \mtx{P}^\tra e^{\alpha \mtx{D}_1 + (1-\alpha) \mtx{D}_2} ) \nonumber \\ %& = \log \rho( \mtx{P}^\tra 2^{-n\mtx{R}} ) \nonumber \\ %& = -n R(n). %\end{align} %By dividing both sides by $-n$, it then follows that $R(n) > R(m)$, establishing that $R(m)$ is a strictly decreasing function of $m$. % %To establish the convergence of $R(m)$ to the Shannon capacity as $m \to 0$, observe that %\begin{align*} %& \lim_{m \to 0} R(m) \\ %& = \lim_{m \to 0} \log \rho\bigl( (\mtx{P}^\tra 2^{-m\mtx{R}})^{\frac{1}{m}} \bigr) \\ %% %& = \log \rho\bigl( \lim_{m \to 0}(\mtx{P}^\tra 2^{-m\mtx{R}})^{\frac{1}{m}} \bigr) \\ %% %& \stackrel{(a)}{=} \log \rho \left( \lim_{m \to 0} \left( \begin{bmatrix} % \pi_1 2^{m \bar{r}_1} & \cdots & \pi_1 2^{m \bar{r}_n} \\ % \vdots & \vdots & \vdots \\ % \pi_n 2^{m \bar{r}_1} & \cdots & \pi_n 2^{m \bar{r}_n} % \end{bmatrix} %\right)^{\frac{1}{m}} \right) \\ %% %& = \lim_{m \to 0} \frac{1}{m}\log \rho \left( \begin{bmatrix} % \pi_1 2^{m \bar{r}_1} & \cdots & \pi_1 2^{m \bar{r}_n} \\ % \vdots & \vdots & \vdots \\ % \pi_n 2^{m \bar{r}_1} & \cdots & \pi_n 2^{m \bar{r}_n} % \end{bmatrix} \right) \\ %% %& = \lim_{m \to 0} \frac{1}{m}\log \left( \sum_{i} \pi_i 2^{m\bar{r}_i} \right) %\\ %% %& \stackrel{(b)}{=} \lim_{m \to 0} \frac{ \sum_{i} \pi_i r _i 2^{m\bar{r}_i} }{ \sum_{i} \pi_i 2^{m\bar{r}_i} } %\\ %& = \sum_{i} \pi_i 2^{m\bar{r}_i} %\end{align*} %where (a) follows from Lemma~\verb|\ref{eq:lim1}| and (b) follows from l'H\^opital's rule. % %The convergence of $R(m)$ to the zero-error capacity as $m \to \infty$ can be proved using the fact that, as $m \to \infty$ %\begin{align*} %R(m) & = -\frac{1}{m}\log \rho (\mtx{P}^\tra 2^{-m \mtx{R}}) \\ %& = - \log \rho \Bigl( (\mtx{P}^\tra 2^{-m \mtx{R}})^{1/m} \Bigr)\\ %& = \bar{r}_1- \log \rho \left( \begin{bmatrix} % p_{11} & \cdots & p_{n1} 2^{-m(\bar{r}_n-\bar{r}_1)} \\ % \vdots & \vdots & \vdots \\ % p_{1n} & \cdots & p_{nn} 2^{-m(\bar{r}_n-\bar{r}_1)} % \end{bmatrix}^{1/m} \right) \\ %& \sim \bar{r}_1- \frac{1}{m} \log p_{11}, %\end{align*} %where the last equation follows from the fact that for any column vector $v=(v_1,\dotsc,v_n)^\tra$, $\rho(\begin{bmatrix} v & \mtx{0}_{n \times (n-1)} \end{bmatrix}) = v_1$. From the above asymptotic approximation it immediately follows that if $\bar{r}_1 =0$, then %\begin{equation} %\lim_{m \to \infty}mR(m) = - \log p_{11}. %\end{equation} % %Next, the property on the sensitivity with respect to self-loop probabilities is a direct application of a result in~\verb|\cite{Cohen1978}|, which is re-stated here as Lemma~\verb|\ref{lem:cohen}|, on the derivative of the spectral radius as a function of non-negative matrix elements. % %Next, the monotonicity property of $-m R(m) = \log \rho( \mtx{P}^\tra 2^{-m \mtx{R}} )$ %follows from the fact that all the entries of the matrix $\mtx{P}^\tra 2^{-m \mtx{R}}$ are monotonically decreasing in $m$. Finally, the strict concavity of $m R(m)$ is again a direct consequence of the log-convexity of the spectral radius of a nonnegative matrix stated in Lemma~\verb|\ref{thm:Friedland}|. % %%%% % %\subsection{Theorem~\verb|\ref{th:combining}|} %\begin{IEEEproof} %By Theorem~\verb|\ref{thm1}| and Theorem~\verb|\ref{thm2}|, at the boundaries of the stability region we must have that %\begin{equation} %\label{eq:any1} %\log |\lambda| = R(m). %\end{equation} %and %\begin{equation} %\label{eq:any2} %\log |\lambda| = C(m \log |\lambda|). %\end{equation} %Therefore,~\eqref{anytime2} follows by combining~\eqref{eq:any1} and~\eqref{eq:any2}. Next, by Proposition~\verb|\ref{prop1}|, the function $\phi(m)=mR(m)$ is increasing and strictly concave, thus invertible. It follows that for every $\alpha \ge 0$, there exists a unique $m := m(\alpha)$ such that %\begin{equation} %m R(m) = \alpha. %\end{equation} %Substituting this equality into~\eqref{anytime2}, it follows that $C_A(\alpha) = R\bigl( m(\alpha) \bigr).$ as claimed. The remaining properties are immediate consequences of Proposition~\verb|\ref{prop1}|. By definition, $C_A(\alpha)$ is non increasing in $\alpha$. Since both $mR(m)$ and $R(m)$ are both monotonic function of $m$, however, it follows that $C_A(\alpha)$ must be strictly decreasing in $\alpha$, thus establishing~2). Property~3) follows from~\eqref{anytime2} combined with the fact that $mR(m) \to 0 $ and $R(m) \to C$ as $m \to 0$. Similarly, property~4) follow directly by combining~\eqref{anytime2} with properties~3) and~5) in Proposition~\verb|\ref{prop1}|. %\end{IEEEproof} % %\subsection{Theorem~\verb|\ref{th:anyerasure}|} %\medskip %\begin{IEEEproof} %By~\eqref{eq:par} and the definition of $R(m)$, for every $\alpha$ there exists an $m(\alpha)$ such that %\begin{equation} %\label{eq:bec2a} %\rho(\mtx{P}^\tra 2^{-m(\alpha) \mtx{R}}) = 2^{-\alpha} %\end{equation} %In the case of a Markov erasure channel, a simple calculation shows that %\begin{equation} %\label{eq:bec2} %\rho(\mtx{P}^\tra 2^{-m \mtx{R}})=\frac{\text{tr}}{2}+\frac{1}{2}\sqrt{\text{tr}^2-4\text{det}}, %\end{equation} %where $\text{tr}$ and $\text{det}$ denote the trace and determinant of~\eqref{eq:bec}, respectively. By combining~\eqref{eq:bec2a} and~\eqref{eq:bec2} and squaring both sides of the resulting equation, it follows that $m(\alpha)$ must satisfy %\begin{equation} %\label{eq:bec3} %2^{-\alpha} \text{tr} - 4 \text{det} = 2^{-\alpha+1}, %\end{equation} %where $\text{tr} = (1-q) +2^{-m(\alpha) \bar{r}}(1-p)$ and $\text{det} = 2^{-m(\alpha) \bar{r}} (1-p-q)$. Solving~\eqref{eq:bec3} yields %\begin{equation} %\label{bec4} %m(\alpha) = \frac{1}{ \bar{r}} \left( \alpha + \log_2 \left(\frac{ 1-p - 2^{\alpha}(1-p-q)}{1-(1-q)2^{\alpha}}\right) \right) %\end{equation} %for $0\le \alpha < -\log_2 (1-q) $. Finally, substituting~\eqref{bec4} into~\eqref{anytime2b} yields~\eqref{eq:anyMarkov}. %\end{IEEEproof} % % %\subsection{Theorem~\verb|\ref{thm:memo}|} %\begin{IEEEproof} %In the special case of an i.i.d. rate process, %\begin{equation*} %\mtx{P}^\tra 2^{-m \mtx{R}} =\bigl(p_1,\ldots,p_n \bigr)^\mathrm{T} \bigl(2^{-m \bar{r}_1},\dotsc,2^{-m \bar{r}_n}\bigr) %\end{equation*} %is a rank-one matrix whose only nonzero eigenvalue is $\sum_{i=1}^n p_i 2^{-m \bar{r}_i} = \mathbb{E}( 2^{-mR})$. Therefore, in this case %\begin{align} %R(m) & = -\frac{1}{m}\log \mathbb{E}( 2^{-mR}) \nonumber \\ %& = -\frac{1}{m}\log M_R( -m \ln 2 ). %\label{eq:mmf} %\end{align} %By combining~\eqref{eq:par} and~\eqref{eq:mmf}, we find that $\alpha = -\log M_R( -m(\alpha) \ln 2 )$ and so %\begin{align} %m(\alpha) = -\frac{1}{\ln 2 } M_R^{-1}( 2^{- \alpha}), %\label{eq:mmf2} %\end{align} %where $M_R^{-1}(y)$ exists in light of property 5) in Proposition~\verb|\ref{prop1}|. Thus, by~\eqref{anytime2b},~\eqref{eq:mmf3} follows. %\end{IEEEproof} \end{document}