Authorea

Paolo added file anytime.tex almost 9 years ago

Commit id: 3b8a74fe8ad6bbad8c4c7c26cfb54aacb8812ae6

deletions | additions

\documentclass[12pt,draftcls,onecolumn]{IEEEtran} \usepackage{stfloats} \usepackage{cite} \usepackage{epsf} %\usepackage[final]{ieeefig} \usepackage{amsmath,amsthm,amssymb,amsfonts} \interdisplaylinepenalty=2500 \usepackage{color} \usepackage{float} \usepackage{subfigure} \usepackage{fancyhdr} \usepackage{verbatim} \usepackage{epsf} \usepackage{hyperref} \usepackage{moreverb} \usepackage{pifont} \usepackage{setspace,supertabular} \usepackage{psfrag} \usepackage{epsfig,graphicx,subfigure,epstopdf} \newtheorem{theorem}{Theorem} \newtheorem{lemma}{Lemma} \newtheorem{prop}[theorem]{Proposition} \newtheorem{claim}[theorem]{Claim} \newtheorem{cor}[theorem]{Corollary} \newtheorem{defn}{Definition}[section] \newtheorem{example}{Example}[section] \newtheorem{obs}{Observation} \newtheorem{remark}{Remark} \newcommand{\e}[2]{e^{(#1)}_{#2}} \newcommand{\z}[2]{z^{(#1)}_{#2}} \newcommand{\un}[1]{\underline{#1}} \newcommand{\bb}[1]{\mathbb{#1}} \newcommand{\one}[1]{1_{\{#1\}}} \newcommand{\mysum}[2]{\sum_{#1}^{#2}} \newcommand{\myprod}[2]{\prod_{#1}^{#2}} \newcommand{\ex}[1]{\E\left[#1\right]} \newcommand{\condex}[2]{\E\left[#1|#2\right]} \newcommand{\exwrt}[2]{\E_{#1}\left[#2\right]} \newcommand{\pr}[1]{ {\sf P}\{#1\}} \newcommand{\cpr}[2]{\text{Pr}\{#1|#2\}} \newcommand{\la}{|\lambda |} \newcommand{\A}{\mathcal{A}} \newcommand{\C}{\mathcal{C}} \newcommand{\U}{\mathcal{U}} \newcommand{\V}{\mathcal{V}} \newcommand{\hx}{\hat{x}} \newcommand{\bx}{\bar{x}} \DeclareMathOperator\E{\sf E} \newcommand{\ab}{{\boldsymbol \alpha}} \newcommand{\R}{\mathbb{R}} \newcommand{\tra}{\mathsf{T}} \newcommand{\lore}[1]{\textcolor{blue}{\emph{Lorenzo: }#1}} \newcommand{\paolo}[1]{\textcolor{green}{\emph{Paolo: }#1}} \newcommand{\massi}[1]{\textcolor{red}{\emph{Massimo: }#1}} \graphicspath{{figs/}} \allowdisplaybreaks \begin{document} \IEEEpeerreviewmaketitle \date{} \title{The Anytime Capacity \\ of Markov Channels} \maketitle \section{Introduction} Some noteworthy references on the anytime capacity~\cite{Forney1974, Schulman1996, Sahai2001, Sahai-Mitter2006,Como-Fagnani-Zampieri2010, simsek-jain-varaija2004,Ostrovsky-etal-2009, Sukhavasi-Hassibi2011a}. The usual references on network-theoretic approach~\cite{Gupta-etal-2007,Gupta-etal-2009,Schenato-etal-2007,Sinopoli-etal-2004,Minero-etal-2009,Huang-Dey-2007}; the usual references on data rate theorem~\cite{Wong-Brocket1999,Brocket-Liberzon2000,Liberzon2003,Braslavsky-Middleton-Freudenberg2007,Delchamps1990,Martins-Dehleh-Elia2006,Nair-Evans-2004,Imer-etal-2006,Tatikonda-Mitter2004,Tatikonda-Mitter2004b,Wong-Brocket1999,You-Xie2011,You-Xie2010,Yuksel2010,Yuksel-Basar2011}; some references on stabilization over AWGN channels~\cite{Braslavsky-Middleton-Freudenberg2007,Middleton-Roja2009,Freudenberg-Middleton-Solo2010}; some references on MJLS~\cite{Seiler-Sengupta2001,Kawka-Alleyne-2006,Liu-Elia-Tationda2004,Sahai-etal-2005} \section{Markov Jump Linear Systems. } Consider the scalar non-homogeneous MJLS~\cite{Costa-et-al-2004} with dynamics \begin{equation} \label{eq:mjls} z_{k+1}=\frac{\lambda}{2^{R_k}}z_k+w_k, \end{equation} where $z_k\in\bb{R}$ with $z_0$ has bounded entropy, $c\ge0$ is a constant, and $\{R_k\}_{k\geq 0}$ is a Markov rate process defined on \begin{align} \label{eq:support} \mathcal{R} = \{\bar{r}_1,\ldots,\bar{r}_n\}, \end{align} for some integer numbers $0 \le \bar{r}_1 < \dots < \bar{r}_n$, and with one-step transition probability matrix $P$ having entries \begin{align} \label{eq:P} p_{ij} = \pr{R_k = \bar{r}_j| R_{k-1} = \bar{r}_i} \end{align} for every $i,j \in \{1,\dotsc,n\}$. The system~\eqref{eq:mjls} is said to be weakly $m$th moment stable if \begin{equation} \label{eq:weak} \sup_{k} \E( |Z|^m )< \infty. \end{equation} Let $R \in \mathbb{Z}_+^{n\times n}$ be a diagonal matrix with diagonal entries $ \bar{r}_1 , \dots , \bar{r}_n$, i.e., \begin{align} \label{eq:Rdiag} R = \text{diag}(\bar{r}_1 , \dots , \bar{r}_n). \end{align} %Let $P^\tra 2^{-mR}$, $m=1,2$, be the $n \times n$ matrix with nonnegative real elements %\begin{equation} %\label{eq:h1} %h_{ij}^{(m)}=\frac{1}{2^{ m \; \bar{r}_j}} p_{ji}, %\end{equation} %for all $i,j\in\{1,\ldots,n\}$. The following lemma states the necessary and sufficient condition for $m$-th moment stability of the system~\eqref{eq:mjls} in terms of the unstable mode $\la$ and the spectral radius $\rho(\cdot)$ of $P^\tra 2^{-mR}$, where $2^{-mR}$ denotes the base-2 matrix exponential of $mR$, i.e., \begin{align} 2^{-mR} = \text{diag}( 2^{-m\bar{r}_1},\cdots, 2^{-m\bar{r}_n}). \end{align} The spectral radius of a matrix is the maximum among the absolute values of its eigenvalues. \medskip \begin{theorem} \label{lemmamjls} For any $m \in \mathbb{R}^+$, if \begin{equation} \label{eq:pp} \la^m<\frac{1}{\rho(P^\tra 2^{-mR})}, \end{equation} then the MJLS~\eqref{eq:mjls} is weakly $m$th moment stable. In the special case $c=0$, the inequality ``$<$'' in~\eqref{eq:pp} is replaced by ``$\le$''. Conversely, if MJLS~\eqref{eq:mjls} is weakly $m$th moment stable, then \begin{equation} \label{eq:pp2} \la^m \leq \frac{1}{\rho(P^\tra 2^{-mR})} \end{equation} In addition, for every $m \ge 2$ the inequality ``$\le$'' in~\eqref{eq:pp2} is replaced by ``$<$''. \end{theorem} \medskip \begin{proof} {[TO WRITE]} The proof of the sufficient condition is new, unpublished, and based on the idea of subsampling the original MJLS and of proving the weak stability of the subsampled system. The proof of the necessary condition with ``$\le$'' sign is standard, while the proof for $m \ge 2$ is based on the maximum entropy theorem and the EPI, as presented at CDC 2013~\cite{minero-etal-2013}. \end{proof} \medskip Notice the trivial gap between the necessary and sufficient conditions in~\eqref{eq:pp} and ~\eqref{eq:pp2} in the case where $c \neq 0$ and $0 \le m < 2$. In this regime, the EPI cannot be applied. Lemma extends the well-known conditions for second moment stability~\cite{Costa-et-al-2004} to $m$-th moment stability. A similar result appears in~\cite[Theorem 3.2]{fang--etal1994} in the special case of a homogeneous MJLS driven by an i.i.d. rate process. %%%%%%%%%%%%%%%%%%%%%% \section{Moment Stabilization of Scalar Systems over Markov Channels} \label{sec:scalar} %\subsection{Main result} %\label{sec:scalarmain} Consider now the scalar dynamical system \begin{align} \label{eq:scalsys} x_{k+1}=\lambda x_k+u_k+v_k,\quad y_k=x_k+w_k,\quad k=0,1,\ldots, \end{align} where $\la \geq1$. where $x_k$ represents the state variable of the system, $u_k$ the control input, $v_k$ an additive disturbance independent of the initial condition $x_0$, $y_k$ the sensor measurement and $w_k$ the measurement noise, that is supposed to be independent of all other random variables. \subsection{Feedback Channel Model} The state observer is connected to the actuator through a noiseless digital communication link that at each time $k$ allows transmission without errors of $R_k$ bits. The rate process $\{R_k\}_{k\geq 0}$ is a homogeneous positive-recurrent Markov chain that takes values on~\eqref{eq:support} and has transition probability $P$. %In the information theory literature, a communication channel is defined by a triple $(\mathcal{X},p(y|x),\mathcal{Y})$ that consists of an input set $\mathcal{X}$, an output set $\mathcal{Y}$ and a transition probability matrix $p(y|x)$ for every $x \in \mathcal{X}$ and $y \in \mathcal{Y}$. A communication channel with state sequence $\{s_k, k \ge 0\}$ is defined as a triple $(\mathcal{X}\times \mathcal{S},p(y|x,s),\mathcal{Y})$ with finite state set $\mathcal{S}$. The noiseless digital link can be regarded as a discrete-memoryless channel with Markov state available causally at both the encoder and the decoder. Formally, a channel with state is defined by a triple $(\mathcal{X}\times \mathcal{S},p(y|x,s),\mathcal{Y})$ consisting of an input set $\mathcal{X}$, a state set $\mathcal{S}$, an output set $\mathcal{Y}$, and a transition probability matrix $p(y|x)$ for every $x \in \mathcal{X}$, $s \in \mathcal{S}$, and $y \in \mathcal{Y}$. A channel with state is said to be memoryless if the output $y_k$ at time $k$ is conditionally independent of everything else given $(x_k,s_k)$. The state sequence is Markov if $S_0,S_1,\dotsc$ forms a Markov chain. The channel model studied in this paper is a discrete-memoryless channel with Markov state $(\mathcal{X}\times \mathcal{S},p(y|x,s),\mathcal{Y})$ with $\mathcal{X}=\mathcal{Y}=\{1,\dotsc,\bar{r}_n\}$, $\mathcal{S} = \{\bar{r}_1,\cdots,\bar{r}_n\}$, \begin{align} p(y|x,s) = \begin{cases} 1 & x=y \text{ and } x \le s \\ 0. & \text{otherwise} \end{cases} \end{align} and state transition probabilities \begin{align} p(s_{k+1}= \bar{r}_j|s_k = \bar{r}_i) = p_{ij}. \end{align} The Shannon capacity of a general discrete-memoryless channel with Markov state was characterized in CITE . In our specific setup, \begin{align} C_{\emph{Shannon}} = \sum_{i=1}^n \pi_i \bar{r}_i, \end{align} where $(\pi_1,\dotsc,\pi_n)$ denotes the unique stationary distribution of $P$. The zero-error capacity of this channel CITE is given by \begin{align} C_{ZE} = \bar{r}_1. \end{align} \subsection{Moment Stability} The system~\eqref{eq:scalsys} is said to be $m$-th moment stable if \begin{equation} \label{eq:sta} \sup_k \E[|X_k|^m]<\infty, \end{equation} where the expectation is taken with respect to the random initial condition, the additive disturbance $v_k$, and the rate process $R_k$. We make the usual assumptions on the tail distribution of the disturbance. The following result establishes the equivalence between the $m$-th moment stability of~\eqref{eq:scalsys} and the weak moment stability of~\eqref{eq:mjls} \medskip \begin{theorem} \label{thm1} There exists a control scheme that stabilizes the scalar system~\eqref{eq:scalsys} in $m$-square sense if and only if the MJLS~\eqref{eq:mjls} is weakly $m$th moment stable, i.e., if and only if \begin{equation} \log |\lambda | \lesssim -\frac{1}{m}\log \rho(P^\tra 2^{-mR}) \triangleq R(m). \label{eq:Rm} \end{equation} \end{theorem} % \medskip \begin{proof}{[TO WRITE]} The proof is pretty much as it was sketched in the CDC 2013 paper~\cite{minero-etal-2013}. The proof technique depends as usual on assumptions made on the noise distribution (bounded or unbounded). Since the anytime capacity result only applies to plants with bounded noise, I suggest writing the proof in this setup and mention that it can be extended to the usual case of unbounded noise. \end{proof} \medskip \begin{figure}[H] \begin{center} \includegraphics[scale=0.75]{Rm.eps} \end{center} \caption{The function $R(m)$ in the special case where $P=$ and $n =4$. The asymptotic expression is given in~\eqref{eq:fact3a}.} \label{fig:Rm} \end{figure} \medskip \begin{prop} \label{prop1} The following holds: \begin{enumerate} \item $R(m)$ is a strictly decreasing function of $m > 0$. \item Convergence to the Shannon capacity: \begin{equation} \label{eq:fact2} \lim_{m \to 0} R(m) = \sum_{i=1}^n \pi_i \bar{r}_i = C_{\emph{Shannon}}, \end{equation} where $C_{\emph{Shannon}}$ denotes the Shannon capacity of the Markov channel. % \item Convergence to the Zero Error capacity: \begin{equation} \label{eq:fact3a} R(m) \sim \bar{r}_1 - \frac{1}{m} \log p_{11}, \quad m \to \infty, \end{equation} hence \begin{equation} \label{eq:fact3b} \lim_{m\to \infty} R(m) = \bar{r}_1 = C_{ZE}, \end{equation} where $C_{ZE}$ denotes the zero-error capacity of the Markov channel. % \item Sensitivity with respect to self-loop probabilities \begin{equation} \label{eq:fact4} \frac{d R(m)}{ d p_{ii}} = - \frac{2^{-m \bar{r}_{ii}}}{m \rho(P^\tra 2^{-mR})} \; \frac{| D(1) |}{\sum_{i=1}^n |D(i)|} < 0, \end{equation} where $D:=\rho(P^\tra 2^{-mR}) I - P^\tra 2^{-mR}$, where $I$ denotes the $n\times n$ identity matrix, and $|D(i)|$ is the determinant of the matrix obtained by eliminating the $i$th row and the $i$th column from $D$. In particular, as $m \to \infty$ \begin{equation} \label{eq:fact4b} \frac{d R(m)}{ d p_{11}} \sim - \frac{1}{m p_{11} \ln(2)}. \end{equation} % \item The function $mR(m)$ is nonnegative, strictly increasing, and strictly concave. If $\bar{r}_1 =0$, $$\lim_{m \to \infty}mR(m) = - \log p_{1,1}.$$ \end{enumerate} \end{prop} % \medskip \begin{proof} See Appendix~\ref{App1}. \end{proof} \medskip %%%%%%%%%%%%%%%%%%%%%% \section{Anytime Capacity of Markov Channels} [We must modify this part, which is copied from the book chapter for Cuomo] Consider a system for information transmission that allows the time for processing the received codeword at the decoder to be infinite, and improves the reliability as time progresses. More precisely, at each step $k$ in the evolution of the plant a new message $m_k$ of $r$ bits is generated that must be sent over the channel. The coder sends a bit over the channel at each $k$ and the decoder upon reception of the new bit updates the estimates for all messages up to time $k$. It follows that at time $k$ messages \begin{equation} m_0,m_1,\dotsc,m_k \nonumber \end{equation} are considered for estimation, while estimates \begin{equation} \hat{m}_{0|k},\hat{m}_{1|k},\dotsc,\hat{m}_{k|k} \nonumber \end{equation} are constructed, given all the bits received up to time $k$. Hence, the processing operation for any message $m_i$ continues indefinitely for all $k \ge i $. A reliability level $\alpha$ is achieved in the given transmission system if for all $k$ the probability that there exists at least a message in the past whose estimate is incorrect decreases $\alpha$-exponentially with the number of bits received, namely \begin{equation} \label{eq:relia} \mathbb{P}( (\hat{M}_{0|k},\dotsc,\hat{M}_{d|k}) \ne (M_0,\dotsc,M_d) ) = O( 2^{-\alpha d }) \quad \text{for all } d \leq k. \end{equation} The described communication system is then characterized by a rate-reliability pair~$(r, \alpha)$. It turns out that the ability to stabilize a dynamical system depends on the ability to construct such a communication system, in terms of achievable coding and decoding schemes, with a given rate-reliability constraints. Let the supremum of the rate $r$ that can be achieved with reliability $\alpha$ be the $\alpha$-anytime capacity $C_A(\alpha)$ of a given Markov channel with channel feedback. The problem of $\alpha$-reliable communication and $m$th moment stabilization are equivalent, as shown by the following. \medskip \begin{theorem} \label{thm2} The necessary and sufficient condition for $m$-moment stabilization of a scalar system with bounded disturbances and in the presence of channel output feedback over a Markov channel is \begin{equation} \log |\lambda| \lesssim C_A(m \log |\lambda|). \label{anytime2} \end{equation} \end{theorem} % \medskip \begin{proof}{[{\color{red}TO DO}]} I think this should be immediate from Sahai's paper. \end{proof} \medskip By combining Theorem \ref{thm1} and Theorem~\ref{thm2}, we obtain the following result. \medskip \begin{theorem} The following holds: \begin{enumerate} \item Parametric characterization of the anytime capacity: For every $m >0$, the anytime capacity $C_A$ satisfies \begin{equation} C_A\bigl(m R(m) \bigr) = R(m), \label{anytime2} \end{equation} i.e., for every $\alpha \ge 0$, there exists a unique $m(\alpha)$ such that \begin{equation} m(\alpha) R\bigl( m(\alpha) \bigr) = \alpha \end{equation} and \begin{equation} C(\alpha) = R\bigl( m(\alpha) \bigr). \end{equation} % \item $C_A(\alpha)$ is a nonincreasing function of $m > 0$. \item Convergence to the Shannon capacity: \begin{equation} \label{eq:fact2} \lim_{\alpha \to 0} C_A(\alpha) = \sum_{i=1}^n \pi_i r_i = C_{\emph{Shannon}}, \end{equation} % \item Convergence to the Zero Error capacity: If $\bar{r}_1 = 0$, then \begin{equation} \label{eq:fact3a1} C_A(\alpha) =0 = C_{ZE}, \quad \text{ for every } \alpha \ge \log(1/p_{11}), \end{equation} If, instead, $\bar{r}_1 \neq 0$, then $C_A(\alpha)$ has unbounded support and \begin{equation} \label{eq:fact3a2} C_A(\alpha) \sim \bar{r}_1 \; \frac{\alpha}{\alpha- \log(1/p_{11}) } , \quad \text{ as } \alpha \to \infty, \end{equation} hence \begin{equation} \label{eq:fact3b} \lim_{\alpha \to \infty} C_A( \alpha) = \bar{r}_1 = C_{ZE}. \end{equation} % \end{enumerate} \end{theorem} % \medskip \begin{proof}{[Sketch]} By using Theorem~\ref{thm:Friedland}, it can shown that the function $\phi(m)=mR(m)$ is increasing and strictly concave, thus invertible. It follows that for every $\alpha \ge 0$, there exists a unique $m := m(\alpha)$ such that \begin{equation} m R(m) = \alpha, \end{equation} hence assuming equality in~\eqref{eq:Rm}, it follows $C_A\bigl( m R(m) \bigr) = R(m)$. The remaining properties are immediate. \end{proof} % \medskip \begin{example} Suppose that $n=4$,$\mathcal{R}=\{1,3,4,5\}$ and $P$ is a $4x4$ circulant matrix with first row equal to $\frac{1}{16}\left(1,13,1,1\right)$, i.e., \begin{equation} \label{eq:exP} P= \frac{1}{16} \begin{bmatrix} 1 & 13 & 1 & 1 \\[0.3em] 13 & 1 & 1 & 1 \\[0.3em] 1 & 1 & 1 & 13 \\[0.3em] 1 & 1 & 13 & 1 \end{bmatrix}. \end{equation} In this case $C_{\text{Shannon}} = \frac{1}{4}(1+3+4+5) = \frac{13}{4}$ and $C_{ZE}=1$. Fig.~\ref{fig:t2} shows a numerical evaluation of $T(m)$ (with asymptotic approximation) and $C_\alpha(\alpha)$ in this case. Notice that both curves have the same shape and they are in fact related by an affine transformation as $m$ grows. Notice the change of convexity for small values of $m$ and $\alpha$, as indication that $T(m)$ and $C_\alpha(\alpha)$ are not convex functions in general. \begin{figure}[H] \begin{center} \includegraphics[width=3.2in]{newt2.eps} \includegraphics[width=3.2in]{c2.eps} \end{center} \caption{$T(m)$.} \label{fig:t2} \end{figure} \end{example} \begin{example} Suppose now that $\mathcal{R}=\{0,3,4,5\}$ and $P$ is as in~\eqref{eq:exP}. Notice that the only difference with the previous example is that $\bar{r}_1$ is now 0 instead of 1. In this case $C_{\text{Shannon}} = \frac{1}{4}(0+3+4+5) = 3$ and $C_{ZE}=0$. Fig.~\ref{fig:t1} shows a numerical evaluation of $T(m)$ (with asymptotic approximation) and $C_\alpha(\alpha)$ in this case. Notice that $C_\alpha(\alpha)$ is zero for $\alpha \ge - \log p_{11} = \log 16 = 4$. \begin{figure}[H] \begin{center} \includegraphics[width=3.2in]{newt1.eps} \includegraphics[width=3.2in]{c1.eps} \end{center} \caption{$T(m)$.} \label{fig:t1} \end{figure} \end{example} %%%%%%%%%%%%%%%%%%%%%% \subsection{The Markov Erasure Channel} Consider the special case of a two-state Markov process where $n=2$ $\mathcal{R}=\{0,\bar{r}\}$, $p_{12} = q$, and $p_{21}=p$ for some $0 \begin{align*} P^\tra 2^{-mR}= \begin{pmatrix} (1-q) & \frac{1}{2^{m\bar{r}}}p\\ q & \frac{1}{2^{m\bar{r}}}(1-p)\\ \end{pmatrix}. \end{align*} For this channel model we have the following result. \medskip \begin{theorem} The anytime capacity of the Markov Erasure Channel is equal to \begin{equation} \label{eq:anyMarkov} C_{A}(\alpha) = \frac{ \alpha \bar{r}}{ \alpha + \log_2 \left(\frac{ 1-p - 2^{\alpha}(1-p-q)}{1-(1-q)2^{-\alpha}}\right) }, \end{equation} if $0\le \alpha < -\log_2 (1-q) $, and 0 otherwise. \end{theorem} \medskip A few remarks are in order: \begin{remark} Eq.~\eqref{eq:anyMarkov} provides the anytime reliability of a binary erasure channel (BEC) with Markov erasures and with noiseless channel output feedback. If, in particular, we let $q=1-p$, then the erasure process becomes i.i.d. and~\eqref{eq:anyMarkov} recovers the anytime capacity of the memoryless BEC with erasure probability $p$ derived by Sahai~\cite[page 129]{Sahai2001} (in parametric form) and by Xu~\cite[Theorem 1.3]{Xu2005} (in non-parametric form) \begin{equation} \label{eq:anyMarkov} C_{A}(\alpha) = \frac{ \alpha }{ \alpha + \log_2 \left(\frac{ 1-p }{1-p2^{-\alpha}}\right) }, \end{equation} \end{remark} \begin{remark} Observe that $\lim_{\alpha \to 0} C_{A}(\alpha) = \frac{q}{p+q}\bar{r}=\mathbb{E}(R) = C_{\text{Shannon}}$, where the expectation is taken with respect to the stationary distribution of $P$. This limiting value is the Shannon capacity $C_{\text{Shannon}}$ of an $\bar{r}$-bit erasure channel with Markov erasures and with noiseless channel output feedback. $C_{A}(\alpha)=0$ for any $\alpha >= -\log_2 (1-q) $ because the probability that $d$ consecutive zeros occurs is $(1-q)^d= 2^{-d (-\log_2(1-q))}$. \end{remark} % \begin{remark} If we specialize to the case $n=2$, $\bar{r}_1=0$, $\bar{r}_2=r$, and an i.i.d. rate process with $P\{R_k = 0\}= p_1$ and $P\{R_k = r\}= p_2$ for all $k$'s, then \begin{align*} \la^m \left( p_1 + p_2 2^{-mr } \right)<1, \end{align*} which provides a converse to the achievable scheme in~\cite[Theorem 3.3]{Yuksel--Meyn2013}, and we further let $r~\to~\infty$, then the stability condition $p_1>1/\la^m$ depends only on the erasure rate of the channel. In this case, our condition generalize the packet loss model result in~\cite{Gupta-etal-2007}. \end{remark} \begin{figure}[H] \begin{center} \includegraphics[scale=0.5]{BECMemory.eps} \end{center} \caption{Anytime reliability of a BEC with Markov erasure for different recovery probability $q$ and $p=0.8$.} \label{fig:scheme} \end{figure} TO DO: Similarly to what Sahai did in the case of the memoryless BEC in his CDC 2004 presentation (\url{http://www.eecs.berkeley.edu/\~sahai/Presentations/CDC2004.pdf}), we can compare the the anytime error exponent to error exponents of block codes. Specifically, we can compare the curves in Fig.~\ref{fig:scheme} obtained by evaluating~\eqref{eq:anyMarkov} with the results by Como, Y\"{u}ksel, and Tatikonda in \cite[Section VI]{Comoetal2009}. \section{Auxiliary Results} We begin with a theorem of Friedland on the log-convexity of the spectral radius of a nonnegative matrix (superconvexity as Kingman~\cite{Kingman1961} called it). \begin{theorem}[Friedland Theorem 4.2~\cite{Friedland1981}] Let $\mathcal{D}_n$ be the set of $n\times n$ real-valued diagonal matrices. Let $\rho(A)$ refer to the spectral radius of a matrix $A$. Let $A$ be a fixed $n\times n$ non-negative matrix having a positive spectral radius. Define $\phi: \mathcal{D}_n \to \mathbb{R}$ by $\phi(D) := \log \rho(e^{D}A)$. Then $\phi(D)$ is a convex functional on $\mathcal{D}_n$. Specifically: for every $D_1, D_2 \in \mathcal{D}_n$ and $\alpha \in (0,1)$, \begin{equation} \label{eq:Friedland} \phi( \alpha D_1 + (1-\alpha)D_2) \le \alpha \phi(D_1) + (1-\alpha) \phi(D_2). \end{equation} Moreover, if $A$ is irreducible and the diagonal entries of $A$ are positive (or A is fully indecomposable) then equality holds in~\eqref{eq:Friedland} if and only if \begin{equation} \label{eq:Friedland2} D_1 - D_2 = c I \end{equation} for some $c \in \mathbb{R}$, where $I$ is the identity matrix. \label{thm:Friedland} \end{theorem} \begin{theorem}[Cohen Theorem 1~\cite{Cohen1978}] Let $A$ be a fixed $n\times n$ non-negative matrix having a positive spectral radius. Define $D:= \rho(A) I - A$, where $I$ denotes the $n\times n$ identity matrix. Then, \begin{equation} \label{eq:Cohen} 0< \frac{d \rho(A)}{d a_{11}} = \frac{| D(1) |}{\sum_{i=1}^n |D(i)|} < 1 \end{equation} \end{theorem} \section{Extensions} \subsection{More Results on Stability of MJLS} \subsubsection{Strong $m$th Moment Stability} We say that the system~\eqref{eq:mjls} is strongly $m$th moment stable if there exists a finite $c_m \in \mathbb{R}$ such that \begin{equation} \label{eq:strong} \E[ |Z|^m ] \to c_m, \qquad \text{ as } m \to \infty. \end{equation} Clearly, if a MJLS is strongly stable, then it is also weakly stable. \begin{lemma} \label{lemmamjls} For every positive integer number $m =1,2,3,\dots$, if the MJLS~\eqref{eq:mjls} is strongly $m$th moment stable if and only if \begin{equation} \label{eq:pp2b} \la^m<\frac{1}{\rho(P^\tra 2^{-mR})}. \end{equation} \end{lemma} \medskip \begin{proof} The proof technique is based on the binomial expansion and was presented in the CDC paper. \end{proof} \appendices \section{Proof of Proposition~\ref{prop1}} \label{App1} \begin{enumerate} \item This is an immediate consequence of the log-convexity of the spectral radius of a nonnegative matrix. Let $D_1 = - m \; \text{diag}(\bar{r}_1,\cdots,\bar{r}_n) \; \ln 2$, $D_2 = 0_{n\times n}$ and $\alpha = \frac{n}{m}$. Notice that $P^\tra 2^{-mR} = P' e^{D_1}$, $\log \rho( P' e^{D_2} ) = 0$, and $H^{(n)} = P' e^{\alpha D_1}$. Then, for every $n < m$, by Theorem~\ref{thm:Friedland} \begin{align} \frac{n}{m} \log \rho( P^\tra 2^{-mR} ) & = \alpha \log \rho( P' e^{D_1} ) + (1-\alpha) \log \rho( P' e^{D_2} ) \nonumber \\ % & > \log \rho( P' e^{\alpha D_1 + (1-\alpha) D_2} ) \nonumber \\ & = \log \rho( H^{(n)} ), \end{align} which implies $T(n) > T(m)$. \item Next, \begin{align*} \lim_{m \to 0} R(m) & = \lim_{m \to 0} \log \rho\bigl( (P^\tra 2^{-mR})^{\frac{1}{m}} \bigr) \\ % & = \log \rho\bigl( \lim_{m \to 0}(P^\tra 2^{-mR})^{\frac{1}{m}} \bigr) \\ % & = \log \rho \left( \lim_{m \to 0} \left( \begin{bmatrix} \pi_1 2^{m r_1} & \cdots & \pi_1 2^{m r_n} \\ \vdots & \vdots & \vdots \\ \pi_n 2^{m r_1} & \cdots & \pi_n 2^{m r_n} \end{bmatrix} \right)^{\frac{1}{m}} \right) \\ % & = \lim_{m \to 0} \frac{1}{m}\log \rho \left( \begin{bmatrix} \pi_1 2^{m r_1} & \cdots & \pi_1 2^{m r_n} \\ \vdots & \vdots & \vdots \\ \pi_n 2^{m r_1} & \cdots & \pi_n 2^{m r_n} \end{bmatrix} \right) \\ % & = \lim_{m \to 0} \frac{1}{m}\log \left( \sum_{i} \pi_i 2^{mr_i} \right) \\ % & = \sum_{i} \pi_i 2^{mr_i} \end{align*} \end{enumerate} \begin{lemma} \begin{equation*} \lim_{m \to 0}(P^\tra 2^{-mR})^{\frac{1}{m}} = \lim_{m \to 0} \left( \begin{bmatrix} \pi_1 2^{m r_1} & \cdots & \pi_1 2^{m r_n} \\ \vdots & \vdots & \vdots \\ \pi_n 2^{m r_1} & \cdots & \pi_n 2^{m r_n} \end{bmatrix} \right)^{\frac{1}{m}} \end{equation*} \end{lemma} % \begin{proof} Let $1/m = k$. By the monotonicity property, it is sufficient to prove the claim for $k$ integer. Let \begin{equation*} A := \begin{bmatrix} p_{1,1} 2^{r_1/k} & \cdots & p_{1,n} 2^{r_1/k} \\ \vdots & \vdots & \vdots \\ p_{n,1} 2^{r_n/k} & \cdots & p_{n,n} 2^{r_n/k} \end{bmatrix} = \left(H^{-\frac{1}{k}}\right)^T \end{equation*} and \begin{equation*} B := \begin{bmatrix} \pi_1 2^{r_1/k} & \cdots & \pi_n 2^{ r_1/k} \\ \vdots & \vdots & \vdots \\ \pi_1 2^{r_n/k} & \cdots & \pi_n 2^{ r_n/k} \end{bmatrix} = \left(\begin{bmatrix} \pi_1 2^{r_1/k} & \cdots & \pi_1 2^{ r_n/k} \\ \vdots & \vdots & \vdots \\ \pi_n 2^{ r_1/k} & \cdots & \pi_n 2^{ r_n/k} \end{bmatrix} \right)^T \end{equation*} We prove that \begin{equation*} \lim_{k \to \infty} A^k = \lim_{k \to \infty} B^k. \end{equation*} It is enough to show that \begin{equation*} \lim_{k \to \infty} [A^k]_{i,j} = \lim_{k \to \infty} [B^k]_{i,j}. \end{equation*} Note that \begin{align*} [A^k]_{i,j} & = \sum_{l_1, \dots,l_{k-1}} \bigl( p_{i l_1} 2^{i/k} \bigr) \cdots \bigl( p_{l_{k-1} j} 2^{l_{k-1}/k} \bigr) \\ % & = \sum_{l_1, \dots,l_{k-1}} \bigl( p_{i l_1} \cdots p_{l_{k-1}j} \bigr) 2^{ (i+ \cdots + l_{k-1})/k} \\ \end{align*} and \begin{align*} [B^k]_{i,j} & = \sum_{l_1, \dots,l_{k-1}} \bigl( \pi_{l_1} 2^{i/k} \bigr) \cdots \bigl( \pi_{l_{j} } 2^{l_{k-1}/k} \bigr) \\ % & = \sum_{l_1, \dots,l_{k-1}} \bigl( \pi_{ l_1} \cdots \pi_{l_{j}} \bigr) 2^{ (i+ \cdots + l_{k-1})/k} \\ \end{align*} % \begin{align*} \lim_{k \to \infty} ( [A^k]_{i,j} -[B^k]_{i,j} ) & = \lim_{k \to \infty} \sum_{l_1, \dots,l_{k-1}} \bigl( p_{i l_1} \cdots p_{l_{k-1}j} - \pi_{ l_1} \cdots \pi_{l_{j}} \bigr) 2^{ (i+ \cdots + l_{k-1})/k} \\ % & \le \lim_{k \to \infty} \sum_{l_1, \dots,l_{k-1}} \bigl( p_{i l_1} \cdots p_{l_{k-1}j} - \pi_{ l_1} \cdots \pi_{l_{j}} \bigr) 2^{ n} \\ & = 2^{ n} \left( \left( \lim_{k \to \infty} \sum_{l_1, \dots,l_{k-1}} p_{i l_1} \cdots p_{l_{k-1}j}\right) - \left( \lim_{k \to \infty} \sum_{l_1, \dots,l_{k-1}} \pi_{ l_1} \cdots \pi_{l_{j}} \right) \right) \\ % & = 2^{ n} \left( \lim_{k \to \infty} [P^k]_{i,j} - \lim_{k \to \infty} [Q^k]_{i,j} \right) \\ & 0. \end{align*} $(i+ \cdots + l_{k-1})/k< n$. \end{proof} \bibliographystyle{IEEEtran} \bibliography{bib_control} \end{document}