\documentclass[10pt]{article}
\usepackage{fullpage}
\usepackage{setspace}
\usepackage{parskip}
\usepackage{titlesec}
\usepackage[section]{placeins}
\usepackage{xcolor}
\usepackage{breakcites}
\usepackage{lineno}
\usepackage{hyphenat}
\PassOptionsToPackage{hyphens}{url}
\usepackage[colorlinks = true,
linkcolor = blue,
urlcolor = blue,
citecolor = blue,
anchorcolor = blue]{hyperref}
\usepackage{etoolbox}
\makeatletter
\patchcmd\@combinedblfloats{\box\@outputbox}{\unvbox\@outputbox}{}{%
\errmessage{\noexpand\@combinedblfloats could not be patched}%
}%
\makeatother
\usepackage[round]{natbib}
\let\cite\citep
\renewenvironment{abstract}
{{\bfseries\noindent{\abstractname}\par\nobreak}\footnotesize}
{\bigskip}
\titlespacing{\section}{0pt}{*3}{*1}
\titlespacing{\subsection}{0pt}{*2}{*0.5}
\titlespacing{\subsubsection}{0pt}{*1.5}{0pt}
\usepackage{authblk}
\usepackage{graphicx}
\usepackage[space]{grffile}
\usepackage{latexsym}
\usepackage{textcomp}
\usepackage{longtable}
\usepackage{tabulary}
\usepackage{booktabs,array,multirow}
\usepackage{amsfonts,amsmath,amssymb}
\providecommand\citet{\cite}
\providecommand\citep{\cite}
\providecommand\citealt{\cite}
% You can conditionalize code for latexml or normal latex using this.
\newif\iflatexml\latexmlfalse
\providecommand{\tightlist}{\setlength{\itemsep}{0pt}\setlength{\parskip}{0pt}}%
\AtBeginDocument{\DeclareGraphicsExtensions{.pdf,.PDF,.eps,.EPS,.png,.PNG,.tif,.TIF,.jpg,.JPG,.jpeg,.JPEG}}
\usepackage[utf8]{inputenc}
\usepackage[english]{babel}
\begin{document}
\title{SHARP}
\author[1]{luca cattivelli}%
\affil[1]{School of Education Pisa}%
\vspace{-1em}
\date{\today}
\begingroup
\let\center\flushleft
\let\endcenter\endflushleft
\maketitle
\endgroup
\sloppy
\documentclass[onecolumn,preprintnumbers,amsmath,amssymb,superscriptaddress,nofootinbib,12pt]{article}
\usepackage[english]{babel}
\usepackage{amsmath}
\usepackage{amsthm}
\usepackage{multirow}
\usepackage{amsfonts}
\usepackage{amssymb}
\usepackage[utf8x]{inputenc}
\usepackage{csquotes}
\usepackage{newlfont}
\usepackage{mathrsfs}
\usepackage{multirow}
\usepackage{adjustbox}
\usepackage[titletoc,title]{appendix}
\usepackage{lipsum}
\usepackage{authblk}
\usepackage{relsize}
\usepackage{graphicx}% Include figure files
\usepackage{dcolumn}% Align table columns on decimal point
\usepackage{bm}% bold math
\usepackage{amscd}
\usepackage{bbold}
\usepackage{hyperref}
\usepackage{natbib}
\usepackage{mathtools}
\usepackage{epstopdf}
\usepackage{rotating}% http://ctan.org/pkg/rotating
%\usepackage[a4paper]{geometry}
%\usepackage{subfigure}
\usepackage{subcaption}
\usepackage{caption}
\usepackage{color}
\newtheorem{lemma}{Lemma}
\newtheorem{theorem}{Theorem}
\newtheorem{proposition}{Proposition}
\usepackage[printwatermark]{xwatermark}
\usepackage[dvipsnames]{xcolor}
\usepackage{scalerel,stackengine}
%\usepackage[a4paper, total={6in, 8in}]{geometry}
\usepackage{authblk}
\newcommand{\argmin}{\operatornamewithlimits{argmin}}
\usepackage{booktabs}
\usepackage{setspace}
\onehalfspacing
\usepackage{geometry}
%\geometry{legalpaper, landscape, margin=2in}
\stackMath
\newcommand\reallywidehat[1]{%
\savestack{\tmpbox}{\stretchto{%
\scaleto{%
\scalerel*[\widthof{\ensuremath{#1}}]{\kern-.6pt\bigwedge\kern-.6pt}%
{\rule[-\textheight/2]{1ex}{\textheight}}%WIDTH-LIMITED BIG WEDGE
}{\textheight}%
}{0.5ex}}%
\stackon[1pt]{#1}{\tmpbox}%
}
\makeatletter
\let\save@mathaccent\mathaccent
\newcommand*\if@single[3]{%
\setbox0\hbox{${\mathaccent"0362{#1}}^H$}%
\setbox2\hbox{${\mathaccent"0362{\kern0pt#1}}^H$}%
\ifdim\ht0=\ht2 #3\else #2\fi
}
%The bar will be moved to the right by a half of \macc@kerna, which is computed by amsmath:
\newcommand*\rel@kern[1]{\kern#1\dimexpr\macc@kerna}
%If there's a superscript following the bar, then no negative kern may follow the bar;
%an additional {} makes sure that the superscript is high enough in this case:
\newcommand*\widebar[1]{\@ifnextchar^{{\wide@bar{#1}{0}}}{\wide@bar{#1}{1}}}
%Use a separate algorithm for single symbols:
\newcommand*\wide@bar[2]{\if@single{#1}{\wide@bar@{#1}{#2}{1}}{\wide@bar@{#1}{#2}{2}}}
\newcommand*\wide@bar@[3]{%
\begingroup
\def\mathaccent##1##2{%
%Enable nesting of accents:
\let\mathaccent\save@mathaccent
%If there's more than a single symbol, use the first character instead (see below):
\if#32 \let\macc@nucleus\first@char \fi
%Determine the italic correction:
\setbox\z@\hbox{$\macc@style{\macc@nucleus}_{}$}%
\setbox\tw@\hbox{$\macc@style{\macc@nucleus}{}_{}$}%
\dimen@\wd\tw@
\advance\dimen@-\wd\z@
%Now \dimen@ is the italic correction of the symbol.
\divide\dimen@ 3
\@tempdima\wd\tw@
\advance\@tempdima-\scriptspace
%Now \@tempdima is the width of the symbol.
\divide\@tempdima 10
\advance\dimen@-\@tempdima
%Now \dimen@ = (italic correction / 3) - (Breite / 10)
\ifdim\dimen@>\z@ \dimen@0pt\fi
%The bar will be shortened in the case \dimen@<0 !
\rel@kern{0.6}\kern-\dimen@
\if#31
\overline{\rel@kern{-0.6}\kern\dimen@\macc@nucleus\rel@kern{0.4}\kern\dimen@}%
\advance\dimen@0.4\dimexpr\macc@kerna
%Place the combined final kern (-\dimen@) if it is >0 or if a superscript follows:
\let\final@kern#2%
\ifdim\dimen@<\z@ \let\final@kern1\fi
\if\final@kern1 \kern-\dimen@\fi
\else
\overline{\rel@kern{-0.6}\kern\dimen@#1}%
\fi
}%
\macc@depth\@ne
\let\math@bgroup\@empty \let\math@egroup\macc@set@skewchar
\mathsurround\z@ \frozen@everymath{\mathgroup\macc@group\relax}%
\macc@set@skewchar\relax
\let\mathaccentV\macc@nested@a
%The following initialises \macc@kerna and calls \mathaccent:
\if#31
\macc@nested@a\relax111{#1}%
\else
%If the argument consists of more than one symbol, and if the first token is
%a letter, use that letter for the computations:
\def\gobble@till@marker##1\endmarker{}%
\futurelet\first@char\gobble@till@marker#1\endmarker
\ifcat\noexpand\first@char A\else
\def\first@char{}%
\fi
\macc@nested@a\relax111{\first@char}%
\fi
\endgroup
}
\makeatother
%\usepackage{draftwatermark}
%\SetWatermarkText{Preliminary draft: do not circulate!}
%\SetWatermarkScale{0.3}
%\SetWatermarkColor[rgb]{0.9,0.9,0.9}
\newcommand{\nota}[1]{\textcolor{red}{(#1)}}
\newcommand{\ASK}{\textrm{A}}
\newcommand{\BID}{\textrm{B}}
\newcommand{\Q}{\textrm{Q}}
\newcommand{\df}{\stackrel{\text{def}}{=}}
\newcommand{\prob}[1]{\mathbb{P}\left[#1\right]}
\newcommand{\E}[1]{\mathbb{E}\left[#1\right]}
\newcommand{\condprob}[2]{\mathbb{P}\left[#1\;\middle\vert\;#2\right]}
\newcommand{\abs}[1]{\left|#1\right|}
\newcommand{\tonde}[1]{\left(#1\right)}
\newcommand{\quadre}[1]{\left[#1\right]}
\newcommand{\graffe}[1]{\left\{#1\right\}}
\newcommand{\due}[2]{\left(\begin{array}{c}#1\\#2\end{array}\right)}
\newcommand{\duedue}[4]{\left(\begin{array}{cc}#1 & #2\\#3 & #4\end{array}\right)}
\newcommand{\tre}[3]{\left(\begin{array}{c}#1\\#2\\#3\end{array}\right)}
\newcommand{\quattro}[4]{\left(\begin{array}{c}#1\\#2\\#3\\#4\end{array}\right)}
\newcommand{\accapo}{\nonumber\\}
\newcommand{\cond}{\;\middle\vert\;}
\newcommand{\luca}[1]{\textcolor{red}{\fbox{LUCA:} #1}}
\newcommand{\be}{\begin{equation}}
\newcommand{\ee}{\end{equation}}
\newcommand{\ba}{\begin{eqnarray}}
\newcommand{\ea}{\end{eqnarray}}
\newcommand{\baa}{\begin{eqnarray*}}
\newcommand{\eaa}{\end{eqnarray*}}
\newcommand{\bi}{\begin{itemize}}
\newcommand{\ei}{\end{itemize}}
\newcommand{\ben}{\begin{enumerate}}
\newcommand{\een}{\end{enumerate}}
\newcommand{\pnt}{\varphi_{n,T}}
\newcommand{\dt}{\delta t}
\newcommand{\Dt}{\Delta t}
\newcommand{\Lra}{\Leftrightarrow}
\newcommand{\Ra}{\Rightarrow}
\newcommand{\ra}{\rightarrow}
\newcommand{\dav}[1]{\textcolor{red}{\fbox{DAV:} #1}}
\newcommand{\bteo}{\begin{theorem}}
\newcommand{\eteo}{\end{theorem}}
\newcommand{\bprop}{\begin{proposition}}
\newcommand{\eprop}{\end{proposition}}
\newcommand{\brem}{\begin{remark}}
\newcommand{\erem}{\end{remark}}
\newcommand{\blem}{\begin{lemma}}
\newcommand{\elem}{\end{lemma}}
\newcommand{\bproof}{\begin{proof}}
\newcommand{\eproof}{\end{proof}}
\newcommand{\defbeg}{\begin{definition}}
\newcommand{\defend}{\end{definition}}
\newtheorem{mydef}{Definition}
\pagestyle{myheadings}
\definecolor{lightgray}{gray}{0.9}
\begin{document}
%% TITOLO!!
%\begin{center}
%\title{A SHARP model of bid-ask spread forecasts\footnote{
%All routines used in this paper are freely available in a form of a Matlab package. This version of the paper is a blinded version so any reference to authors and routines has been temporarily removed.
%}}
%\end{center}
% SHARP: Seasonal Heterogeneous AutoRegressive Poisson.
%\date{\today}
%\begin{titlepage}
%\author{ }
%\email[]{luca.cattivelli@sns.it}
%\affiliation{Scuola Normale Superiore, Pisa, Italy}
%\author{Davide Pirino}
%\email[]{davide.pirino@sns.it}
%\affiliation{Scuola Normale Superiore, Pisa, Italy}
%\begin{center}
%\Large
%\bf{THIS IS A PRELIMINARY VERSION OF THE PAPER: PLEASE DO NOT CIRCULATE\\}
%\normalsize
%\bf{For a more recent version of this paper, please contact Luca Cattivelli at luca.cattivelli@sns.it}
%\end{center}
%\maketitle
%\vspace{2cm}
%\begin{abstract}
%\nota{ NOTA: l'abstract in IJoF deve avere tra 100 e 150 parole (https://www.elsevier.com/journals/international-journal-of-forecasting/0169-2070/guide-for-authors). Questo abstract ha 224 parole. Bisogna quindi sintetizzare un po'.
%Ho messo in rosso quello che secondo me puo' essere tolto o ridotto. Ho poi aggiunto un possibile abtract con meno di 150 parole. }
%In this paper we propose an accurate, parsimonious and fast-to-estimate
%forecasting model suitable for discrete-valued time series characterized by
%long memory and seasonality.
%The modelling is achieved through a discrete-time
%Poisson process with a predictable
%stochastic intensity determined by two factors: a seasonal intraday pattern
%and a heterogeneous autoregressive
%component. For this reason,
%we name the model SHARP, which is an
%acronym for Seasonal Heterogeneous Auto-Regressive Poisson.
%\nota{ QUI TAGLIEREI: Motivated by the prominent role of the
%bid-ask spread as a transaction cost for trading,
%we discuss how the SHARP model can
%be estimated to achieve reliable forecasts of bid-ask spreads of NYSE equity stocks.
%As standard in this literature, the model is defined and estimated on
%an equi-spaced time grid. This choice unavoidably brings a loss of information, since
%the dynamics of the historical time series between two consecutive instants of the grid
%is discarded. For this reason} we present an extension of the SHARP model,
%based on the same rationale of the mixed-data sampling of \cite{ghysels2007midas}, which
%adopts more efficiently the historical information flow and provides, empirically, the best (among all the
%models considered) forecasting performances.
%We conclude by showing how bid-ask spread forecasts based on the SHARP model can be exploited to reduce the
%total cost incurred by a trader that is willing to buy or sell a given amount of an equity stock.
% \\
% \\
%\\
%% ABSTRACT!!!!
%In this paper we propose an accurate, parsimonious and fast-to-estimate
%forecasting model for discrete-valued time series with long memory and seasonality.
%The modelling is achieved through an autoregressive
%Poisson process with a predictable
%stochastic intensity determined by two factors: a seasonal intraday pattern
%and a heterogeneous autoregressive
%component. We name the model SHARP, which is an
%acronym for Seasonal Heterogeneous Auto-Regressive Poisson.
%We also present a mixed-data sampling extension of the model, which
%adopts more efficiently the historical information flow and provides empirically the best (among all the
%models considered) forecasting performances for the bid-ask spreads of NYSE equity stocks.
%We conclude by showing how bid-ask spread forecasts based on the SHARP model can be exploited to reduce the
%total cost incurred by a trader that is willing to buy or sell a given amount of an equity stock.
%\end{abstract}
%
%\medskip \textbf{JEL codes: C02; C58; C87; C53}
%
%\medskip \textbf{Keywords: bid-ask spread; forecasting; liquidity; long-memory; seasonality; integer-valued; econometric models}
%\end{titlepage}
%\clearpage
\section*{Introduction}
It is widely accepted that, at moderate sampling frequencies,
%the dynamics of financial prices is well described
the dynamics of asset prices are well described
by an Ito-semimartingale process.
Nevertheless, at high frequencies (e.g. at one minute or more), price paths move away from this assumption: the discrete nature of prices clearly arises and the modelling of the price variations with continuous-valued processes (as implied by the Ito-semimartingale assumption) can lead to a severe misspecification of the model. The same reasoning can
be straightforwardly applied to the modelling of the bid-ask spread dynamics as well as several other important financial variables, such as the number of transactions in a given time interval or the number of traded shares per transaction,
all of which are discrete-valued stochastic processes.
The discreteness is not the only relevant feature of these variables, indeed they are also typically characterized by a strong intraday pattern and strong persistence, e.g. see \cite{brownlees2011intra} for the volume, \cite{gross2013predicting} for the spread and \cite{andersen1997intraday} for the magnitude of the price variations.
In this paper, we put our focus on bid-ask spreads of equity stocks. A forecasting model for the spread is of great interest for the large number of high frequency traders active nowadays in the market. These traders are usually interested in anticipating transaction costs (e.g. the quoted spread) for the purpose of minimizing execution costs. This interest is shared by the academic literature, as witnessed by the vast number of contributions on
optimal executions of orders \citep[see, among others,][]{almgren2001optimal,alfonsi2010optimal,almgren2003optimal,predoiu2011optimal,gatheral2011optimal}. %These studies typically achieve optimal execution by minimizing volatility risk and transaction costs arising from permanent and temporary market impact.
In this context, bid-ask spread plays a prominent role since it is one of the main execution costs: in fact it is the cost of immediate trading when the trader places a market order.
For this reason, traders prefer using limit orders when the spread is large and market orders when the spread is small. Empirically, it has been found that the proportion of limit orders is positively related to the size of the spread \citep{foucault2005limit}.
Despite its prominent role for trading\footnote{Spread predictions are also relevant in the context of risk measures. One notable example is the liquidity-adjusted intraday Value-at-Risk of \cite{weiss2013forecasting} in which a joint modelling of bid-ask spread and log returns is used for the prediction of three types of liquidity-adjusted intraday VaR's. With respect to usual risk measures, this approach has the advantage of taking into account the liquidity risk, which is of great concern to both portfolio managers and investors.}, the literature on bid-ask spread forecasting is not particularly vast.
Nevertheless, in the past two decades, two important contributions on bid-ask spread forecasting, one by \cite{gross2013predicting} and another one by \cite{taylor2002economic}, have partially filled this lack of contributions. In \cite{taylor2002economic} the unrestricted vector autoregression (VAR) model of \cite{huang1999fx} is used to generate intraday forecasts of spreads. The authors showed that the unrestricted VAR model allows the investor to reduce 35$\%$ of spread costs. This model forecasts future spreads using the past level of five predictors: the spread itself, inter-dealer competition (measured as the number of different dealers placing orders in a given time interval), return volatility, trade volume and trade intensity.
In \cite{gross2013predicting}, the authors introduce the Long-Memory Autoregressive Conditional Poisson (LMACP) model, which incorporates all the salient features of the bid-ask spread, that is the strong autocorrelation, the seasonality and the discreteness of observations. They show that the LMACP model significantly outperforms forecasts from AR, ARMA, ARFIMA, ACD and FIACD models. They further prove that the spread predictions obtained with the LMACP allow the trader to decrease spread transaction costs up to $14\%$.
The main purpose of this paper is to design a parsimonious model for integer-valued
stochastic processes characterized by seasonality and long-memory, which is easily tractable, accurate and fast to estimate.
We lay the foundations of our framework on the Autoregressive Conditional Poisson (ACP) process, a versatile model for integer random stochastic processes\footnote{The ACP process has been studied in the literature with different names: INGARCH in \cite{ferland2006integer}, CBIN in \cite{davis2001cbin} and Autoregressive Conditional Poisson process in \cite{heinen2003modelling,heinen2007multivariate}.}.
As mentioned above, we introduce both a seasonal component in the model (apt to fit intraday patterns) and an heterogeneous autoregressive (HAR) specification \citep[\`a la][]{corsi2009simple} for the dynamics over the intraday pattern. We name the model SHARP which is the acronym for Seasonal Heterogeneous Auto-Regressive Poisson.
The adoption of heterogeneous components has the advantage of keeping the estimation fast and, most importantly, very accurate.
In fact, even if from a mathematical point of view
the HAR specification does not define a long-memory process, it produces
slowly decaying memory patterns that are indistinguishable from those observed in data.
This is a great advantage over models, such as fractionally integrated models, which are nontrivial to estimate
and not easily extendible to multivariate processes.
We validate the goodness of our minimal approach by showing that the forecasting accuracy of our model outperforms that of a genuine long-memory model, the LMACP proposed in \cite{gross2013predicting}, and that of other simpler models for the dynamics of the bid-ask spread.
This said, the SHARP, as all the discrete-time models discussed so far,
faces the limitation of being implemented
on a evenly spaced time grid, hence only a fraction of the total
information generated by the historical time series is used.
In order to overcome this limitation, we propose an extension of the SHARP
model in which the information flow generated by the spread between two consecutive points of the time grid is used
as an additional source of information, as in the MIDAS approach by \cite{ghysels2007midas}.
We prove that the new extended model (that we call MIDAS-SHARP or mSHARP)
so obtained shows superior forecasting accuracy with respect to all the other models considered, including
the SHARP.
Finally, as an empirical application, we show how spread forecasts provided by the SHARP can
be exploited for reducing the total transaction costs of a trading strategy.
In particular we prove that a trader that schedules trades according to the SHARP spread forecasts is capable
to significantly (in a statistical sense) reduce
execution costs with respect to other benchmark strategies that do not profit
from the SHARP predictions.
This paper is organized as follows: Section \ref{sec:spread_properties}
provides a description of the salient
stylized facts for time series of bid-ask spreads. Section \ref{sec:mio} describes in details our
new econometric model, the SHARP. Two possible estimation procedures (maximum likelihood and ordinary least squares)
for the SHARP are presented in Section \ref{sec:est} while
Section \ref{sec:MIDAS-SHARP} is dedicated to the
extension called MIDAS-SHARP.
Then, we assess, with a comparative exercise, the forecasting performances
of the SHARP (and of the mSHARP) in Section \ref{sec:Empirical}. In the final
part of Section \ref{sec:Empirical}
we prove how the spread
predictions based on the SHARP model can be used
to reduce the costs of a trading strategy. Finally, Section \ref{sec:conclusions} concludes.
\section*{ The Dataset: Stylized Facts}\label{sec:spread_properties}
We start our discussion with the description of the major stylized facts
that characterize series of bid-ask spreads of equity stocks.
For this purpose, we select ten representative stocks from a
large dataset of all quote updates\footnote{Data source: Thomson Reuters. Prior to the empirical analysis, Ask and Bid prices have been corrected for the presence of outliers with the
procedure proposed by \cite{brownlees2006financial}. } of 244 NYSE stocks.
These 244 are chosen as the most liquid (in terms of total volume from 2006 to 2014) stocks of the NYSE. We divide them into 10 quantiles in terms of their average quoted
spread\footnote{The average quoted Spread $\bar{q}_J$ is defined as $\bar{q}_{J} = 1/(D\,J) \, \sum_{t=1}^{D\,J} Q_{t} = 100/(D\,J) \, \sum_{t=1}^{D\,J} (\ASK_{t}-\BID_{t})$, where $D$ is the number of trading days in the considered year, $J=390$ is the number of intraday observations of the ask price $\ASK_{t}$ and of the bid price $\BID_{t}$ at a one-minute frequency, $Q_{t}=\ASK_{t}-\BID_{t}$ is the quoted spread in dollar cents and $t$ runs over all the
minutes without distinguishing
between consecutive days.} in 2014. Within these 10 groups, the dynamics of the
bid-ask spread is quite different. In particular, stocks with small average spread size
show quoted bid-ask spread almost always equal to $0.01\$$, while stocks with large average spread are characterized by a
dynamics of bid-ask spread much more volatile. For each quantile, we selected the most
representative stock by choosing that with the highest median (daily traded)
volume (in 2014). This filtering procedure returns the ten tickers (ordered from the first to the last
quantile) BAC, VZ, GM, DAL, HAL, XOM, VLO, CVX, APC and IBM.
By construction, the selected stocks cover almost
all the spread sizes observed in the market, or at least in our
original dataset of 244 NYSE stocks. Table \ref{table:stocks} reports some
summary statistics of the ten selected stocks computed using all the data of 2014.
In particular, we report the average time $\left<\Delta t\right>$ between two consecutive quote updates. Not surprisingly, small tick stocks (such as IBM) show
rapid (more precisely less than one second) spread updates, while large tick ones (such as BAC) are characterized
by less frequent spread changes\footnote{Large tick stocks are defined as stock for which the quoted bid-ask spread is
almost always equal to one, while small tick stocks are characterized by spreads of few ticks \citep{eisler2012price,dayri2015large}.}. Finally, note that for the mean, the standard deviation and the maximum of the quoted spread reported in Table \ref{table:stocks},
we distinguish between the spread sampled
every minute and every five seconds. This distinction is
necessary since, in the forecasting exercise, we treat the two frequencies
separately.
\begin{table}[]
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{r r r r r r r r r r r}
%\multicolumn{11}{c}{}\\
%\multicolumn{11}{c}{Average Statistics.}\\
\multicolumn{11}{c}{\textbf{Summary Statistics}}\\
\hline
& & & & & & & & & \\
&BAC &VZ &GM &DAL &HAL &XOM &VLO &CVX &APC &IBM\\
\cline{2-11}\\
Median volume ($\times 10^{6}$)&14.64&3.36&3.08&2.16&1.84&2.74&1.30&1.60&0.95&0.88\\
Mean price &16.34&48.61&34.58&37.57&58.69&97.30&51.30&120.54&94.77&182.32\\
$\left<\#\text{ transactions}\right>$ ($\times 10^{3}$) &6.10&6.29&5.93&6.28&8.03&8.76&4.73&7.04&5.09&4.22\\
$\sqrt{RV\times252}\times 100 $&16.47&12.81&20.30&29.14&25.78&14.07&26.35&14.18&26.21&11.82\\
$\left<\Delta t\right>$ & 12.99 & 2.88 & 2.73 & 1.16 &0.95 & 0.74 & 0.96 & 0.58 & 0.82 &0.91\\
%mean Spread &1.15&1.31&1.41&1.54&1.99&2.10&2.56&3.01&5.63&7.92\\
& \\
& \multicolumn{10}{c}{Quoted spread at one-minute frequency}\\
\cline{2-11}\\
Mean & 1.01 &1.11 &1.13 &1.34 &1.68 &1.72 &2.23 &2.51 &4.57 &6.53\\
Standard deviation & 0.11 &0.37 &0.38 &0.63 &1.01 &1.19 &1.43 &1.85 &3.58 &4.66\\
Maximum &3 &17 &10 &29 &28 &30 &28 &42 &76 &94\\
\\
& \multicolumn{10}{c}{Quoted spread at five-second frequency}\\
\cline{2-11}\\
Mean & 1.01 &1.11 &1.12 &1.31 &1.67 &1.71&2.12 &2.50&4.54&6.47\\
Standard deviation & 0.11&0.36 &0.36 &0.61 &0.97 &1.15 &1.39&1.78 &3.48 &4.58\\
Maximum &3 &17 &10 &29 &29 &40 &35 &72 &76 &99\\
& & & & & & & & & & \\
\hline
\end{tabular}
}
\end{center}
\caption{\footnotesize{This table reports, in order from the first to the last row, the median daily volume (in number of shares), the average daily closing price, the average number of daily transactions (indicated with $\left<\#\text{ transactions}\right>$), the average annualized (five-minute) realized volatility and, with $\left<\Delta t\right>$, we indicate the average time (in seconds) for a change in the spread. Finally, the last six rows report, in order, the average, the standard deviation and the maximum of the quoted bid-ask spread (expressed in dollar cents) at one-minute frequency and at five-second frequency.}}\label{table:stocks}
\end{table}
We limit the estimation of the models and the forecasting exercise
to the last year in the sample, i.e. the 2014. Since
we deal with high-frequency data,
this is largely enough to achieve a reliable identification of the models \nota{cosa intendiamo con identification? R2 consiglia estimation}
and to test the forecasting accuracy of each of them.
Hence, our sample is made of $D=244$ days.
We frame all models on an equispaced temporal grid, dividing each
day into $J$ equispaced periods.
As anticipated above, we estimate
the models and perform the corresponding
forecasts using two frequencies: one minute and five seconds.
The former case is achieved by choosing $J=390$\footnote{The NYSE trading day starts at 9:30 AM and closes at 4:00 PM, corresponding to 6 hours and 30 minutes of trading, thus $J=6\times 60 +30 = 390$. } while the latter
corresponds to $J=390\times 60/5 = 4\,680$. For a given $J$,
we observe $\ASK_t$ and $\BID_t$, respectively, the ask and the
bid price prevailing at time $t$, where $t$ is the discrete-time index
\be\label{eq:tinset}
t\in\mathcal{T}\stackrel{\text{def}}{=}\graffe{1,...,J,J+1,...,2\,J,...,D\cdot J},
\ee
said differently $t$ runs over all the
elements of the time grid $\mathcal{T}$, without distinguishing
between consecutive days. We finally introduce the bid-ask spread $S_{t}$ as
$$
S_{t}\df100\,(\ASK_{t}-\BID_{t})-1.
$$
Hence $S_{t}$ is defined as the number of price
intervals of size $0.01\$ $ between the ask price $\ASK_{t}$ and
the bid price $\BID_t$, minus one\footnote{Trivially, the quoted spread $Q_t$ (expressed in dollar cents)
is equal to $Q_t=S_t+1$.}. When the ask and the bid are separated
by the smallest possible distance, that is $0.01\$ $, the variable $S_t$ is zero.
We prefer working with this quantity instead
of the quoted spread $Q_t=S_t+1$ for modelling purposes. In fact, in Section \ref{sec:mio}, we model the conditional
distribution of the spread with a Poisson
distribution, which has support on the set of non-negative integer numbers.
The high frequency dynamics of $S_t$ has well-known empirical regularities,
which are summarized in Figure \ref{fig:empirical} for the case of XOM, although they are
pretty similar for all the ten assets in the sample.
First of all (top-left panel of Figure \ref{fig:empirical}) it is a discrete-valued process.
Secondly, it shows a strong intraday seasonality: the top-right plot in Figure \ref{fig:empirical} reports, for the case of the five-seconds grid (i.e. $J=4\,680$) and as a function of the intraday index $j_t=t - \left \lfloor{\frac{t}{J}}\right \rfloor \cdot J \in \graffe{1,...,J}$, the sample mean
\be\label{eq:phi_hat}
\widehat{\varphi}_{j_t}=\frac{1}{D} \sum_{d=0}^{D-1} S_{j_t+d\,J},
\ee
which is our estimator for the unconditional expected value of the spread in the $j_t$-th time instant of the day, i.e. for the intraday seasonal pattern.
The seasonality can be noticed also from the strong periodicity of the autocorrelation of $S_t$
(bottom-left of Figure \ref{fig:empirical}). Finally, the bottom-right plot in Figure \ref{fig:empirical} reports the autocorrelation function
for the de-seasonalized time series $S_t/\widehat{\varphi}_{j_t}$ which clearly shows a strong persistence\footnote{Given the periodicity of the intraday pattern $\varphi$, the estimator $\widehat{\varphi}_{j_t}$, for $t\in\mathcal{T}$ and $t>J$ is defined as $\widehat{\varphi}_{j_t}=\widehat{\varphi}_{t - \left \lfloor{\frac{t}{J}}\right \rfloor \cdot J}$.}.
The empirical evidences \nota{R1 dice che evidences non va bene} described so far suggest that any forecasting model for the bid-ask spreads of equity stocks
should unavoidably incorporate discreteness, intraday seasonality and persistence. We turn now to this point
describing in detail our modelling framework.
\begin{figure}[ht!]
\includegraphics[scale=0.47]{images/empirical_regularites_new_notation_XOM.eps}
\caption{\footnotesize{In this figures we show some empirical regularities of the five-seconds bid-ask spread for XOM. We use the first thirty trading days of the year, from January 2, 2014, to February 13, 2014. We report four graphs: 1) (top-left) the spread series on January 8, 2014; note that it is a discrete-valued stochastic process, that is $S_t \in \mathbb{N}_0\,\forall t$; 2) (top-right) the intraday pattern $\varphi_{j_t}$, which is estimated as the average spread: $\widehat{\varphi}_{j_t}=\frac{1}{D} \sum_{d=0}^{D-1} S_{j_t+d\,J}$, and then smoothed with a moving average filter \nota{Piu dettagli sullo smoother!}; 3) (bottom-left) the autocorrelation function of $S_t$; 4) (bottom-right) the autocorrelation function of the de-seasonalized time series $S_t/\widehat{\varphi}_{j_t}$, where $\widehat{\varphi}_{j_t}=\widehat{\varphi}_{t - \left \lfloor{t/J}\right \rfloor \cdot J}$ indicates the intraday seasonal pattern associated with the spread $S_t$.
In all plots, when they appear, dashed lines represent 95\% and 5\% confidence intervals.}}
\label{fig:empirical}
\end{figure}
\section*{The Seasonal Heterogeneous Auto-Regressive Poisson model} \label{sec:mio}
\nota{Accorcia la parte teorica!}
In this section we formally introduce the
SHARP model. As anticipated in the introduction, we develop
our framework for time-series (in particular for the intraday
bid-ask spread of equity stocks) that are integer-valued,
seasonal and strongly persistent.
Besides, we frame our model
in discrete time, a common choice in the literature on time-series
forecasting.
Consider an integer-valued stochastic
process $\tonde{S_t}_{t\in\mathcal{T}}$ sampled on the discrete-time
grid $\mathcal{T}$ defined in \eqref{eq:tinset} and let $\mathcal{F}_t$ be its natural filtration.
The definition of the SHARP process that we give below is generic and can be adapted to any
sampling frequency, hence we leave unspecified the number $J$ of intraday observations that appear in
the definition of $\mathcal{T}$.
In what follows we indicate with $\mathbb{P}_t\quadre{\cdot}$ the $\mathcal{F}_t$-conditional probability of any event and
with $\mathbb{N}_0=\left\{0,1,2,...\right\}$ the set of non-negative integer numbers. The following
is our formal definition of the SHARP model.
\begin{mydef}\label{def:SHARP}
{\it A discrete-time process $\tonde{S_t}_{t\in\mathcal{T}}$ is a SHARP process if
\begin{equation}\label{seasonal_model}
\begin{array}{rll}
%S_t\,|&\,\mathcal{F}_{t-1}\sim \mathcal{P}(\lambda_t);\\
\mathbb{P}_{t-1}\quadre{S_t=k}&=&\lambda_t^k\,\frac{e^{-\lambda_t}}{k!},\quad k\in\mathbb{N}_0,~t\in\mathcal{T},\vspace{0.2cm}\\
\lambda_t&=&\varphi_{j_t} \,\mu_t,\\
\mu_t&=&(1-\Sigma_\alpha) +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1}, \end{array}
\end{equation}
where $\varphi_{j_t}=\varphi_{t - \left \lfloor{t/J}\right \rfloor \cdot J}$ is a periodic and positive deterministic process, $\Sigma_\alpha=\alpha^{\tonde{s}}+\alpha^{\tonde{m}}+\alpha^{\tonde{\ell}}<1$ with $\alpha^{\tonde{s}}>0$, $\alpha^{\tonde{m}}>0$, $\alpha^{\tonde{\ell}}>0$, and where the averages $\widetilde{S}_{t_1:t_2}$, for generic indexes $t_1$ and $t_2$ in $\mathcal{T}$, are defined as
\be\label{eq:har_averages}
\widetilde{S}_{t_1:t_2}\df\frac{1}{t_2-t_1+1}\,\sum_{q=t_1}^{t_2} \frac{S_{q}}{\varphi_{j_q}}.
\ee
}
\end{mydef}
Before proceeding with the discussion on
the estimation of the model in Definition \ref{def:SHARP}, we describe step-by-step
how the set of equations in \eqref{seasonal_model} are
assembled starting from the Autoregressive Conditional Poisson model
(ACP) of \cite{heinen2003modelling}. For this purpose, consider that an ACP-model for the spread $S_t$ would be written as
\begin{equation}\label{eq:ACP}
\begin{array}{rll}
%S_t\,|\,\mathcal{F}_{t-1}&\sim \mathcal{P}(\lambda_t)\\
\mathbb{P}_{t-1}\quadre{S_t=k}& = & \lambda_t^k\,\frac{e^{-\lambda_t}}{k!},\quad k=0,1,2,\hdots,\vspace{0.2cm}\\
\lambda_t & = & c+\alpha(B) S_t +\beta(B) \lambda_t,
\end{array}
\end{equation}
where $c\in\mathbb{R}$ is a constant and where $\alpha(B)$ and $\beta(B)$ are polynomials of the backshift operator%\footnote{The backshift operator
%is defined here in the standard fashion
%$$
%B^qS_t=S_{t-q}
%$$
%for all $q=0,1,2,...,t$ and for all $t\in\mathcal{T}$.
%}
$B$, i.e.
$$
\alpha(B)=\sum_{q=1}^a \alpha_q B^q,\quad \beta(B)=\sum_{q=1}^{b} \beta_q B^q,
$$
with $\alpha_q>0$ for $q=1,...,a$ and $\beta_q>0$ for $q=1,...,b$.
The ACP model is suited
for integer-valued processes, however it is not designed to capture long-memory and seasonality patterns, as those
described in Figure \ref{fig:empirical}.
In what follows we discuss separately how these two important features can be included with a minimal effort.
\paragraph{Seasonality.} The model in \eqref{eq:ACP} can be easily modified to include
a seasonal deterministic pattern in the dynamics, more precisely to incorporate an intraday pattern for $\E{S_t}$.
Consider for this purpose a model for the bid-ask spread $S_t$ of the form
\begin{equation}\label{simple_seasonal_model}
\begin{array}{rll}
%S_t\,|&\,\mathcal{F}_{t-1}\sim \mathcal{P}(\lambda_t);\\
\mathbb{P}_{t-1}\quadre{S_t=k}&=&\lambda_t^k\,\frac{e^{-\lambda_t}}{k!},\quad k=0,1,2,\hdots,\vspace{0.2cm}\\
\lambda_t&=&\varphi_{j_t} \,\mu_t,\vspace{0.2cm}\\
\mu_t&=&c+\alpha(B) \frac{S_t}{\varphi_{j_t}} +\beta(B) \mu_t,
\end{array}
\end{equation}
where the deterministic (and positive) pattern $\varphi_{j_t}=\varphi_{t - \left \lfloor t/J\right \rfloor \cdot J}$ is left unspecified and %must be estimated.
may be estimated nonparametrically.
We will refer to this specification as sACP$(a,b)$, where $a$ and $b$ are the orders of the two polynomials $\alpha(B)$ and $\beta(B)$, respectively.
The model described by the system of equations \eqref{simple_seasonal_model} features the separation of the intensity $\lambda$ into a seasonal pattern $\varphi$ and an additional stochastic process $\mu$ that plays the role of a de-seasonalized intensity. %By assuming a constant value\footnote{The parametric restrictions, needed to ensure a constant value for the expected value of $\mu_t$, will be derived, for the case of the SHARP model, in Theorem \ref{th:LLN}} for
%$\E{\mu_t}$,
Under the assumption of weak-stationarity\footnote{The parametric restrictions needed to ensure covariance stationarity of the process $\tonde{S_{j_t+d\,J}}_{d\in\graffe{0,1,...,D-1}}$ will be discussed in the proof of Theorem \ref{th:LLN}. This property guarantees that, for any given $j_t$, the expected value of the process $\tonde{S_{j_t+d\,J}}_{d\in\graffe{0,1,...,D-1}}$ does not depend on the day $d$, but only on $j_t$, allowing the incorporation of a seasonal intraday pattern for $\E{S_t}$ with period $J$.} for the process $\tonde{S_{j_t+d\,J}}_{d\in\graffe{0,1,...,D-1}}$ with given $j_t\in\{1,2,...,J\}$, we can set the parameter $c$ (in the last of the equations \eqref{simple_seasonal_model}) equal to
\be\label{eq:ccond}
c=1-\sum_{q=1}^a \alpha_q- \sum_{q=1}^{b} \beta_q,
\ee
in order to satisfy the condition $\mathbb{E}[\mu_t]=1$, which in turn guarantees that\footnote{Straightforward computations, similar to the variance targeting case for GARCH models \citep{engle1995grappling}, shows that in order to have \eqref{eq:ESequalPhi} we need the parametric restriction \eqref{eq:ccond}.}
\be\label{eq:ESequalPhi}
\mathbb{E}\left[S_t\right]=\varphi_{j_t},\quad \forall t.
\ee
Hence, by imposing \eqref{eq:ccond}, we are ensuring that $\mu$ describes the deviation of the intensity $\lambda$ from the seasonal pattern $\varphi$.
The intraday pattern $\varphi$ can be estimated in many ways. We postpone to Section \ref{sec:est} the discussion on this topic.
%A natural choice is to take the sample mean of $S_t$ over each day\footnote{
%A valid alternative is considered in the LMACP model by \cite{gross2013predicting}. They consider a Fourier-expansion of the seasonal component
%\begin{equation}\label{Fourier}
%\varphi_{j_{t}}=\delta_0+\delta^\varphi \frac{j_{t}}{J} + \sum_{k=1}^K \left(\delta^\varphi_{1,k} \cos\left(\frac{j_{t}}{J}2\pi k\right) +\delta^\varphi_{2,k} \sin\left(\frac{j_{t}}{J} 2 \pi k\right) \right),
%\end{equation}
%with the parameters $\delta_0$, $\delta^\varphi$, $\delta^\varphi_{1,k}$ and $\delta^\varphi_{2,k}$ estimated via maximum likelihood.}.
%This leads to the aforementioned estimator $\widehat{\varphi}_{j_t}=\frac{1}{D}\sum_{d=0}^{D-1} S_{d\,J+j_t}$, as in equation \eqref{eq:phi_hat}.
%For independent random variables the convergence $\widehat{\varphi}_{j_t}\stackrel{p}{\rightarrow}\mathbb{E}\left[S_{t}\right]$ for $D\rightarrow\infty$ is guaranteed by the law of large number. However this is not our case, since we are dealing with
%dependent observations. Nevertheless the
%results still holds, as proved in Theorem \ref{th:LLN}, and is a direct consequence of
%the weak stationarity of the series $\tonde{S_{d\,J+j}}_{d\in\graffe{1,...,D}}$
%and the summability of its covariances.
\paragraph{Long-memory.} The hybrid model described by the equations \eqref{simple_seasonal_model} features an
intraday seasonal pattern but it can not fit the long-memory structure of the time-series $S_t$, that is the slowly decaying autocorrelation
of
%Having dealt with the seasonal pattern, we now have to properly describe the strong persistence of the de-seasonalised process. As before let $\varphi_{j_{t}}$ be defined as\footnote{Note that we are implicitly assuming
%that the unconditional expected value of $S_t$ does not depend on $t$. This property
%is satisfied by the SHARP model with a parametric choice analogous to \eqref{eq:param_condition}.} $\varphi_{j_{t}}=\mathbb{E}\quadre{S_t}$.
the de-sesonalised variate $S_t/\varphi_{j_t}$ (e.g., see Figure \ref{fig:empirical}). Such a long-memory structure can be
incorporated following several modelling strategies, more precisely
by choosing an appropriate dependence of $\mu_t$ from past realizations of the
dependent variable $S_t$.
The adoption of fractionally integrated processes is a valid possibility \citep[see][among others]{baillie1996long,jasiak1999persistence,rossi2014long,bayer2016pricing}.
Nevertheless, empirically, the estimation of these models is often problematic and time consuming.
To circumvent this complexity, we take inspiration from the popular HAR model by \cite{corsi2009simple}.
The HAR model has the advantage of reproducing slowly-decaying memory patterns indistinguishable from that observed in the data,
while preserving a simple structure. The definitive specification\footnote{In \cite{corsi2009simple}, realized volatility is modeled as a simple AR-type process with different volatility components realized over three different time horizons (daily, weekly and monthly).
In our SHARP model, we adopt a specification with three different time horizons as well.} of the SHARP model is hence achieved by
imposing an HAR structure to the de-seasonalised intensity $\mu_t$ that appears in the model \eqref{simple_seasonal_model}, that is
by substituting the last of the equations in \eqref{simple_seasonal_model} with
\be\label{eq:HARforMu}
\mu_t = c+\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1},
\ee
where the averages $\widetilde{S}_{t-1:t-1}$, $\widetilde{S}_{t-m:t-1}$ and $\widetilde{S}_{t-\ell:t-1}$ are defined in equation \eqref{eq:har_averages}.
The parameter $c$ is a constant, $m$ and $\ell$ are two integers with $m<\ell$ and the parameters $\alpha^{\tonde{s}}$, $\alpha^{\tonde{m}}$ and $\alpha^{\tonde{\ell}}$
correspond to, respectively,
the short ($s$), medium ($m$) and long ($\ell$) autoregressive components for $\mu_t$.
Note that, while the short-term component is
chosen to coincide with $S_{t-1}/\varphi_{j_{t-1}}$, we allow $m$ and $\ell$ to be chosen via an optimization procedure. We will turn later on this point.
Finally, in order to guarantee that the HAR structure in equation \eqref{eq:HARforMu} preserves, as for the hybrid model in equations \eqref{simple_seasonal_model}, the identity $\E{\mu_t}=1$, the constant parameter $c$ is to be chosen by imposing the constraint \eqref{eq:ccond}, which gives $c=1-\tonde{\alpha^{\tonde{s}}+\alpha^{\tonde{m}}+\alpha^{\tonde{\ell}}}$, whence the specification $\mu_t=(1-\Sigma_\alpha) +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1}$ with $\Sigma_\alpha=\alpha^{\tonde{s}}+\alpha^{\tonde{m}}+\alpha^{\tonde{\ell}}$ that appears in Definition \ref{def:SHARP}.
\section*{Estimation of the SHARP model}\label{sec:est}
\nota{AE: non c'e bisogno di spiegare come funziona la stima mle, riassumi!! anche per R2 qst sezione e' troppo lunga}
The estimation of the SHARP model poses the problems
of choosing the integer parameters $m$ and $\ell$ that define the auto-regressive
components in the last of equations \eqref{seasonal_model} and of identifying the seasonal
pattern $\varphi$ together with the three parameters $\alpha^{\tonde{s}}$, $\alpha^{\tonde{m}}$ and $\alpha^{\tonde{\ell}}$.
Since, as formally stated below in Theorem \ref{th:LLN}, the intraday pattern $\varphi$ can
be efficiently estimated non-parametrically, we rely on a two-step estimation procedure in
which the first step consists, indeed, in a non-parametric estimate of $\varphi$ while in the second step we use a
maximum-likelihood estimator for $m$, $\ell$,
$\alpha^{\tonde{s}}$, $\alpha^{\tonde{m}}$
and $\alpha^{\tonde{\ell}}$. The second step is feasible only if an estimate of $\varphi$
is provided, since the log-likelihood of the SHARP model depends on it.
This two-step procedure has non-trivial consequences on the estimation
of the standard errors of the parameters. We discuss in detail this issue\footnote{We thank one anonymous referee for suggesting us the adoption of a robust estimator of the standard errors.} in the end of the section.
%Finally, we discuss in detail how the two-step procedure impacts the
%derivation of the standard errors
%of the estimated parameters.
Finally, we prove that it is possible
to estimate the autoregressive
parameters $\alpha^{\tonde{s}}$, $\alpha^{\tonde{m}}$ and $\alpha^{\tonde{\ell}}$
also via ordinary least squares, a possibility that has the not negligible advantage of
providing an analytical expression for the estimators.
\paragraph{First step: the estimation of the intraday seasonal pattern.}
A natural estimator of the intraday seasonal pattern $\varphi$ is the intraday sample average defined in equation \eqref{eq:phi_hat}.
This said, its asymptotic consistency is far from being guaranteed for a SHARP process, since the discrete-time stochastic process
$\tonde{S_t}_{t\in\mathcal{T}}$ is a collection of non-stationary dependent random variables. Nevertheless, there is an enormous
literature on weak laws of large numbers for dependent random variables.
%Nevertheless, the (weak) law of large number still holds if some regularity conditions
%are met \citep[see, for example, Proposition 7.5 of][]{hamilton1994time}. We need then a theoretical result that guarantees that
%at least one of these conditions is met. This issue is explicitly addressed in the following theorem, together with a regularity condition of the log-likelihood of the SHARP model.
The theoretical results that follows hinges indeed on one of these contributions, more precisely on Proposition 7.5 of \cite{hamilton1994time}, which
establishes some sufficient regularity conditions needed to verify the weak law of large numbers for a time series of dependent random variables. Finally notice that our model is complicated by the fact that neither the process $\tonde{S_t}_{t\in\mathcal{T}}$ nor the trend adjusted process $\tonde{S_t/\varphi_{j_t}}_{t\in\mathcal{T}}$ are stationary. For this reason, the law of large number numbers does not apply straightforwardly.
\begin{theorem}\label{th:LLN}
Assume that the process $\tonde{S_t}_{t\in\mathcal{T}}$ is a SHARP process. Then, for a given $j_t=t - \left \lfloor{\frac{t}{J}}\right \rfloor \cdot J \in \graffe{1,...,J}$, the parametric restrictions $0<\alpha^{\tonde{s}}<1$, $0<\alpha^{\tonde{m}}<1$, $0<\alpha^{\tonde{\ell}}<1$ and $\Sigma_\alpha=\alpha^{\tonde{s}}+\alpha^{\tonde{m}}+\alpha^{\tonde{\ell}}<1$ are sufficient to guarantee that the time-series $\tonde{S_{j_t+d\,J}}_{d\in\graffe{0,1,...,D-1}}$ is covariance stationary and
\be\label{eq:consistency}
\widehat{\varphi}_{j_t}\df\frac{1}{D}\sum_{d=0}^{D-1} S_{j_t+d\,J}\stackrel{p}{\to} \varphi_{j_t}\textrm{ as }D\to \infty.
\ee
%Besides, the log-likelihood of the model, for given $m$ and $\ell$ is
%\begin{equation}\label{eq:loglik}
%\mathcal{L}\tonde{\alpha^{\tonde{s}},\alpha^{\tonde{m}},\alpha^{\tonde{\ell}}} = \sum_{t\in\mathcal{T} } l_t = \sum_{t\in\mathcal{T} } \left[- \lambda_t-\log(S_t!) + S_t \log(\lambda_t) \right]
%\end{equation}
%where $T=D\cdot J$, is a concave function of the three parameters $\tonde{\alpha^{\tonde{s}},\alpha^{\tonde{m}},\alpha^{\tonde{\ell}}}$.
\end{theorem}
{\it Proof.} See Appendix \ref{app:LLN}.\\
It is worth to remark that Theorem \ref{th:LLN} does not establish the covariance-stationarity for the SHARP process $\tonde{S_t}_{t\in\mathcal{T}}$, but for all the $J$ processes $\tonde{S_{j_t+d\,J}}_{d\in\graffe{0,1,...,D-1}}$, with $j_t=1,...,J$, which is exactly what is needed to get consistency of the estimators $\widehat{\varphi}_{j_t}$. Finally, we notice that the parametric restrictions described in Theorem \ref{th:LLN} are identical to the covariance-stationarity conditions of a standard ACP process. Therefore it is natural to ask whether the result in Theorem \ref{th:LLN} could be derived directly from the properties of the standard ACP process applied to the de-seasonalized process $S_t/\varphi_{j_t}$.
% The latter differs from a SHARP model for the absence of a seasonal pattern $\varphi_{j_t}$.
%Anyhow %, even if we remove the seasonal pattern from a SHARP model, considering
However this is not the case since the latter %the de-seasonalized process $S_t/\varphi_{j_t}$, we do not obtain
does not follow an ACP process\footnote{This can be easily understood from the fact that $S_t/\varphi_{j_t}$ is not a integer-valued process and, therefore, its conditional distribution is not Poisson.}.
Let us emphasize nevertheless that, in finite sample, the use of a kernel smoother is recommended in order to reduce the variance of $\widehat{\varphi}_{j_t}$, as in Figure \ref{fig:empirical} \nota{Aggiungi piu dettagli su come implemento il kernel. La frase inoltre non e' chiara per R2: specificare che si tratta del trade off tra bias e varianza: l'uso del kernel puo' generare bias ma attenua la varianza}. Moreover, if the event $S_t=0$ is frequent, it is often necessary to substitute \eqref{eq:consistency} with a robust alternative $\widehat{\varphi}^{(0)}_{j_t}=\max\{\widehat{\varphi}_{j_t},\,\varphi^{\text{min}}\}$, with $\varphi^{\text{min}}>0$, in order to avoid zeros in the intraday pattern\footnote{In the empirical analysis of Section \ref{sec:Empirical}, we adopt $\varphi^{\text{min}}=0.1$, however out-of-sample results are largely independent on the choice of $\varphi^{\text{min}}$.}, which could originate divergences in $\widetilde{S}_{t_1:t_2}$ of equation \eqref{eq:har_averages}.
\paragraph{Second step: the estimation of $m$, $\ell$, $\alpha^{\tonde{s}}$, $\alpha^{\tonde{m}}$ and $\alpha^{\tonde{\ell}}$.} \nota{in qst paragrafo cerchiamo di accorciare! per il R2}
After having estimated the intraday pattern, we can proceed with the identification of the remaining parameters.
These are estimated by maximizing the log-likelihood of the SHARP process, which is written as
%\ba\label{eq:ll}
%\log \mathcal{L}(\boldsymbol{\vartheta})&=&\sum_{t\in\mathcal{T} } -\widehat{\varphi}_{j_t} \left( 1-\Sigma_\alpha +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1} \right) -\log (S_t !) \accapo
%& +&S_t\, \log\left[\widehat{\varphi}_{j_t} \left( 1-\Sigma_\alpha +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1} \right) \right]
%\ea
%where
\small{
\ba\label{eq:ll}
\hspace*{-0.2cm}
\log \mathcal{L}(\boldsymbol{\vartheta})&=&\sum_{t\in\mathcal{T} } l_t\accapo
&=& \sum_{t\in\mathcal{T} }\Bigg\{ -\log (S_t !) -\widehat{\varphi}_{j_t} \left( 1-\Sigma_\alpha +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1} \right) +\accapo
& &+S_t\, \log\left[\widehat{\varphi}_{j_t} \left( 1-\Sigma_\alpha +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1} \right) \right] \Bigg\}
\ea}
where $l_t$ is the logarithm of the conditional probability of observing $S_t$, and
\be\label{eq:theta}
\boldsymbol{\vartheta}=(m, \ell, \alpha^{\tonde{s}},\alpha^{\tonde{m}},\alpha^{\tonde{\ell}})'
\ee
is the vector of the parameters of the model. When the integers $m$ and $\ell$ in the log-likelihood \eqref{eq:ll}
are considered as free parameters to be estimated, $\lambda_t$ is no longer an affine function of the parameters. As a
consequence, the log-likelihood is not concave in $\boldsymbol{\vartheta}$. %The absence of concavity in the log-likelihood and the discreteness of the parameters do not guarantee the consistency of the maximum likelihood estimators. For this reason, in Appendix \ref{sec:finite_sample_properties} we study their finite sample properties. In the same section we also analyze the finite sample properties of our main competitor, the LMACP model, which is briefly described in Appendix \ref{sec:Axel}. Here we limit to say that the finite sample performance of the MLE estimator is satisfactory for sample size
%comparable with those used in empirical applications, e.g. when dealing with data sampled at one-minute frequency.
On the contrary, if one selects a priori the parameters $m$ and $\ell$, such as in \cite{corsi2009simple}, the estimation procedure is highly simplified by the concavity of the log-likelihood \citep[for a discussion on this topic, we refer to the book by][]{boyd2004convex}.
In the empirical analysis presented in Section \ref{sec:Empirical}, we derive a set of recommended values for $m$ and $\ell$ for different frequencies of observation. These values can be used by researchers to simplify and improve (by exploiting the
concavity of the log-likelihood) the estimation of the SHARP model\footnote{Choosing an a-priori value for $m$ and $\ell$
carries the not negligible advantage that, in this case,
the SHARP model is also robust to the misspecification of the Poisson distribution. This can be seen by
looking at the score vector, whose component $f$, with $f\in\graffe{s,m,\ell}$,
\be\label{eq:score}
\frac{\partial \log \mathcal{L}(\boldsymbol{\vartheta}) }{\partial \alpha^{(f)}} =
\sum_{t\in\mathcal{T} } \frac{\partial \lambda_t}{\partial \alpha^{(f)}} \left( \frac{S_t} {\lambda_t}-1\right)
\ee
has expected value equal to zero as long as the intensity $\lambda_t$ is correctly specified (that is, if $\mathbb{E}[S_t|\mathcal{F}_{t-1}]=\lambda_t$, s.t. $\mathbb{E}[\frac{\partial \log \mathcal{L}(\vartheta) }{\partial \alpha^{(f)}}|\mathcal{F}_{t-1}]=0$).
Therefore, the maximum likelihood estimation with the
Poisson distribution can be
considered a Quasi-Maximum Likelihood estimator.}. In the empirical analysis presented, we always adopt the
recommended values as well.
\nota{Accorcia la parte teorica! e metti in appendice i dettagli}
\paragraph{Standard errors and model misspecification.} Assuming to have selected
a priori the integers $m$ and $\ell$, the two-step procedure described in Section \ref{sec:est} returns
the estimates
$$
\tonde{\widehat{\boldsymbol{\varphi}},\widehat{\boldsymbol{\vartheta}}}=\tonde{\widehat{\varphi}_1,\hdots,\widehat{\varphi}_{J},\widehat{\alpha}^{\tonde{s}},\widehat{\alpha}^{\tonde{m}},\widehat{\alpha}^{\tonde{\ell}}}
$$
where $\widehat{\varphi}_{j_t}$ for $j_t=1,...,J$ are obtained through the sample average estimator defined in equation \eqref{eq:phi_hat} and
the remaining parameters are estimated by maximizing the (concave) log-likelihood in \eqref{eq:ll} with respect to
the parameters $ \alpha^{\tonde{s}}$, $\alpha^{\tonde{m}}$ and $\alpha^{\tonde{\ell}}$.
The main issue in deriving standard errors for the vector of estimated
parameters $\tonde{\widehat{\boldsymbol{\varphi}},\widehat{\boldsymbol{\vartheta}}}$
relies on the fact that the model can be misspecified, and then a robust estimator of the standard errors
must be employed.
%In all the empirical applications that follow, we compute
%standard errors with
%a sandwich estimator, which is not affected by
%the misspecification of the
%conditional distribution\footnote{We thank two anonymous referees for this suggestion.}.
%This guarantees the reliability of the statistical inference.
The calculation of the standard errors could be approached via the standard sandwich estimator. In this case, a notable simplification comes from the fact
that the first and the second derivatives of the
log-likelihood as a function
of the parameters $ \alpha^{\tonde{s}}$, $\alpha^{\tonde{m}}$ and $\alpha^{\tonde{\ell}}$
are analytically tractable\footnote{Besides, the score vector and the Hessian matrix can be used to simplify the numerical optimization of the likelihood. Indeed the optimization algorithm are usually faster and more robust with the inclusions of the derivatives.}.
Indeed, for every couple of indexes $f$ and $l$, both chosen in $\graffe{s,m,\ell}$, the corresponding element of the Hessian matrix is computed as
\be\label{eq:hess}
-\frac{\partial^2 \log \mathcal{L}(\boldsymbol{\vartheta}) }{\partial \alpha^{(f)} \partial \alpha^{(l)}}=\sum_{t\in\mathcal{T} } \frac{S_t}{\lambda_t^2} \frac{\partial \lambda_t}{\partial \alpha^{(f)}} \frac{\partial \lambda_t}{\partial \alpha^{(l)}},
\ee
with
$$
\frac{\partial \lambda_t}{\partial \alpha^{(f)}}= \varphi_{j_t} \left(-1+ \widetilde{S}_{t-f:t-1} \right).
$$
This said, the use of a sandwich estimator does not take into account the effect
of the two-step estimation procedure on the computation of the standard errors.
To circumvent this problem, we follow the approach of \cite{cipollini2017copula} rephrasing
the two-step procedure as a two-step GMM estimator, for which robust standard errors are known \citep{newey1994large}.
For our case,
both the sample average $\widehat{\varphi}_{j_t} $ and the maximum likelihood
estimator used for the remaining parameters,
can be seen as special cases of GMM estimators. %Hence we follow \cite{newey1994large}
%\nota{nella cartella replay-to-AE/bibliografy, da pagina 2175}
% and \cite{cipollini2017copula}
%\nota{stessa cartella, sezione 3.2.1}
Hence, by adapting the approach of \cite{cipollini2017copula},
we get for the asymptotic variance-covariance
matrix ($\text{aVar}$) of $\sqrt{T} (\widehat{\boldsymbol{\vartheta}} - \boldsymbol{\vartheta})$
the expression
\begin{equation}\label{eq:robust_se}
\text{aVar}[\sqrt{T} (\widehat{\boldsymbol{\vartheta}}- \boldsymbol{\vartheta})]=
\left(
H_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}}^{-1},
H_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}}^{-1} \, H_{\boldsymbol{\vartheta}\boldsymbol{\varphi}}
\right) \,
\left(\begin{array}{cc}
\Omega_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}} & \Omega_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} \\[5pt]
\Omega_{\boldsymbol{\varphi}\boldsymbol{\vartheta}} & \Omega_{\boldsymbol{\varphi}\boldsymbol{\varphi}} \\[5pt]
\end{array} \right) \,
\left(
H_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}}^{-1},
H_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}}^{-1} \, H_{\boldsymbol{\vartheta}\boldsymbol{\varphi}}
\right)',
\end{equation}
with\footnote{The operators $\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}}$ and $\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\varphi}}$ appearing in equation \eqref{eq:definitions_se} are defined as $(\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}})_{h,k}=\partial^2/(\partial \vartheta_h \partial \vartheta_k )$ and $(\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\varphi}})_{h,k}=\partial^2/(\partial \vartheta_h \partial \varphi_k )$.}
\begin{equation}\label{eq:definitions_se}
H_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}} = \mathbb{E}[\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}} l_t],\quad H_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} = \mathbb{E}[\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} l_t],\quad
\Omega = \text{Var}[(\nabla_{\boldsymbol{\vartheta}} l_t', \mathbf{m}_{\lfloor t/J \rfloor}') ],
\end{equation}
where $l_t$ is defined in equation \eqref{eq:ll}. %and where %$\mathbf{m}_{d_t} = \left( S_{d_t J +1} - \varphi_{1}, S_{d_t J +2} - \varphi_{2}, ..., S_{d_t J +J} - \varphi_{j_t} \right)'$,
%$d(t) = \lfloor t/J \rfloor$.
The vector $\mathbf{m}$ is the moment (vector) function giving $\widehat{\varphi}_{j_t}$ as a GMM estimator of $\varphi_{j_t}$, defined as
$$
\mathbf{m} = \sum_{d =0}^{D-1} \mathbf{m}_d = \sum_{d =0}^{D-1} \left( S_{d J +1} - \varphi_{1}, S_{d J +2} - \varphi_{2}, ..., S_{d J +J} - \varphi_{J} \right)'.
$$
The matrix $\Omega$ denotes the variance-covariance matrix of the two moment functions $\nabla_{\boldsymbol{\vartheta}} l_t$ and $\mathbf{m}_{\lfloor t/J \rfloor}$.
All the matrices
that appear on the right hand side of
equation \eqref{eq:robust_se} can be estimated by their corresponding sample counterparts, which are
%$$
%\begin{array}{lrlrl}
%\left(\widehat{\Omega}_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} \right)_{h,k} & =& \frac{1}{D} \sum_{d=0}^{D-1} (\nabla_{\boldsymbol{\vartheta}} l_{d J+k})_h \,\left( \mathbf{m}_d\right)_{k} &=& \frac{1}{D} \sum_{d=0}^{D-1} (\nabla_{\boldsymbol{\vartheta}} l_{d J+k})_h \,( S_{d J +k} - \widehat{\varphi}_{k} ),\\[0.3cm]
%\left(\widehat{\Omega}_{\boldsymbol{\varphi}\boldsymbol{\vartheta}}\right)_{h,k} &=& \frac{1}{D} \sum_{d=0}^{D-1} (\mathbf{m}_{d})_h \, (\nabla_{\boldsymbol{\vartheta}} l_{d J +h})_k &=& \frac{1}{D} \sum_{d=0}^{D-1} ( S_{d J +h} - \widehat{\varphi}_{h} )\,(\nabla_{\boldsymbol{\vartheta}} l_{d J+h})_k,\\[0.3cm]
%\left(\widehat{\Omega}_{\boldsymbol{\varphi}\boldsymbol{\varphi}}\right)_{h,k} &=& \frac{1}{D} \sum_{d=0}^{D-1} (\mathbf{m}_{d})_h (\mathbf{m}_{d})_k&=&\frac{1}{D} \sum_{d=0}^{D-1} ( S_{d J +h} - \widehat{\varphi}_{h} )\,( S_{d J +k} - \widehat{\varphi}_{k} ),
%\end{array}
%$$
$$
\left(\widehat{\Omega}_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} \right)_{h,k} = \frac{1}{D} \sum_{d=0}^{D-1} (\nabla_{\boldsymbol{\vartheta}} l_{d J+k})_h \,\left( \mathbf{m}_d\right)_{k} = \frac{1}{D} \sum_{d=0}^{D-1} (\nabla_{\boldsymbol{\vartheta}} l_{d J+k})_h \,( S_{d J +k} - \widehat{\varphi}_{k} ),
$$
with $h \in \{1,2,3\}$ and $k \in \{1,...,J\}$,
$$
\left(\widehat{\Omega}_{\boldsymbol{\varphi}\boldsymbol{\varphi}}\right)_{h,k} = \frac{1}{D} \sum_{d=0}^{D-1} (\mathbf{m}_{d})_h (\mathbf{m}_{d})_k=\frac{1}{D} \sum_{d=0}^{D-1} ( S_{d J +h} - \widehat{\varphi}_{h} )\,( S_{d J +k} - \widehat{\varphi}_{k} ),
$$
with both $h$ and $k$ integers in $\graffe{1,...,J}$, and\footnote{Notice that $\left(\widehat{\Omega}_{\boldsymbol{\varphi}\boldsymbol{\vartheta}}\right) = \left(\widehat{\Omega}_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} \right)'$.} (remember that $T=J\cdot D$)
$$
\widehat{H}_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}} = \frac{1}{T} \sum_{t\in\mathcal{T}} (\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}} l_t), \quad
\widehat{H}_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} = \frac{1}{T} \sum_{t\in\mathcal{T}} (\nabla^2_{\boldsymbol{\vartheta}\boldsymbol{\varphi}} l_t), \quad
\widehat{\Omega}_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}} = \frac{1}{T} \sum_{t\in\mathcal{T}} (\nabla_{\boldsymbol{\vartheta}} l_t) (\nabla_{\boldsymbol{\vartheta}} l_t)',
$$
%\nota{Dubbio: e' giusto utilizzare i giorni d come indice di sommatoria? perche' di solito si usa tutto il sample! cioe' si somma su $t\in \mathcal{T}$. Inoltre in $\left(\widehat{\Omega}_{\boldsymbol{\varphi}\boldsymbol{\varphi}'}\right)_{i,j}$ sto calcolando la covarianza tra eventi non contemporanei: $ ( S_{d J +i} - \widehat{\varphi_{i}} )$ e $( S_{d J +j} - \widehat{\varphi_{j_t}} )$. Non so se e' corretto! forse dovrei assumere che $\left(\widehat{\Omega}_{\boldsymbol{\varphi}\boldsymbol{\varphi}'}\right)_{i,j}$ e' diagonale?}
where we recognize the Hessian of the log-likelihood \eqref{eq:hess} in $\widehat{H}_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}}$ and the outer-product $(\nabla_{\boldsymbol{\vartheta}} l_t) (\nabla_{\boldsymbol{\vartheta}} l_t)'$ of the gradient in $\widehat{\Omega}_{\boldsymbol{\vartheta}\boldsymbol{\vartheta}}$.
Concerning the standard errors of the intraday pattern, the asymptotic variance-covariance matrix of $\sqrt{T} (\widehat{\boldsymbol{\varphi}}_T - \boldsymbol{\varphi})$ is equal to $\Omega_{\boldsymbol{\varphi}\boldsymbol{\varphi}}$, with $\boldsymbol{\varphi}=( \varphi_1, \varphi_2, ..., \varphi_J)$.
\paragraph{OLS estimation of the SHARP.}
One of the main advantages of the HAR model of \cite{corsi2009simple} is that it can be
analytically estimated via ordinary least squares. Here we prove that the SHARP model shares this feature\footnote{We thank an anonymous referee for this suggestion.}.
First, we rewrite the second of the equations in \eqref{seasonal_model} as\footnote{For the sake of simplicity, we recall here that the second line of equation \eqref{seasonal_model} is $\lambda_t=\varphi_{j_t} \mu_t$.}
\begin{equation}\label{eq:HAR_1}
\widetilde{S}_{t:t} = \mu_t + \varepsilon_t,
\end{equation}
where the martingale difference sequence $\varepsilon_t$ is defined as $\varepsilon_t\df (S_{t}-\lambda_t)/\varphi_{j_t}$. Then, substituting the explicit expression of $\mu_t$ from equation \eqref{seasonal_model} into equation \eqref{eq:HAR_1}, we obtain an HAR-like structure in $\widetilde{S}_{t:t}-1$
\begin{equation}\label{eq:S_HAR_2}
(\widetilde{S}_{t:t}-1) = \alpha^{\tonde{s}}\,(\widetilde{S}_{t-1:t-1}-1)+ \alpha^{\tonde{m}}\,(\widetilde{S}_{t-m:t-1}-1) +\alpha^{\tonde{\ell}}\,(\widetilde{S}_{t-\ell:t-1}-1) + \varepsilon_t.
\end{equation}
In equation \eqref{eq:S_HAR_2}, the parameters $\boldsymbol{\vartheta}=(\alpha^{(s)},\alpha^{(m)},\alpha^{(\ell)})'$ can now be estimated via OLS.
The error term $\varepsilon_t$ is neither gaussian nor homoscedastic, however robust standard errors can be calculated through equations \eqref{eq:robust_se} and \eqref{eq:definitions_se}, with $l_t = (\varepsilon_t)^2$, after having estimated the intraday seasonal pattern as in \eqref{eq:consistency}. We will refer to this specification of the SHARP model as olsSHARP.
\section*{MIDAS-SHARP}\label{sec:MIDAS-SHARP}
\nota{R2: Midas-SHARP. Why not introducing the model more simply as SHARP
with additional covariance-stationary regressors?. direi di sottolineare qst punto, ma senza togliere il MIDAS-SHARP}
The SHARP modelling described so far is sub-optimal
from an informational point of view. \nota{cambiare la frase: suboptimal non e' la parola adeguata, secondo R2} To clarify this assertion,
consider that only the realizations of the spread
sampled on the equispaced time grid $\mathcal{T}$
enter in the estimation and, hence,
constitute the information set used
in every forecasting exercise.
As a consequence the spread
history between any two consecutive
instants of the grid $\mathcal{T}$ is ignored, bringing unavoidably
a loss of information.
%since a fraction of spread values are not included in the model.
%Consider, as an example, Figure \ref{fig:one_minute_spread},
%where a fraction of the continuous time dynamics of the spread is shown.
%It is evident that a large part of the dynamics is
%discarded when only one-minute data are considered.
This drawback can be alleviated by exploiting
the informational
content of the spread observed on
some time partition finer than the original partition $\mathcal{T}$,
a modelling strategy that derives directly from the mixed-data sampling (MIDAS) of \cite{ghysels2004midas,ghysels2007midas}.
%\dav{ da qui ===$>$}
%Consider, for example, the modelling of liquidity adjusted Value-at-Risk,
%which attempts to forecast likely future losses accounting also for the effects of the transaction costs given by the bid-ask spread. The horizon of interest is usually some minutes (e.g., 5 minutes in \cite{weiss2013forecasting}), whereas the bid-ask spread is available at a tick-by-tick base\footnote{
%The same time-horizon is used for other high-frequency risk measures which do not take into account the liquidity risk, such as the intraday VaR of \cite{giot2005market} and \cite{dionne2009intraday}. However, \cite{weiss2013forecasting} shows that liquidity risk has to be taken account in high frequency risk modelling if a correct estimation of losses is desired.}.
%\dav{ $<$=== a qui non capisco cosa vogliamo dire. La narrazione \'e parecchio confusa.} \nota{LUCA: volevo fare un esempio di utilizzo del forecast dello spread in cui la frequenza di interesse e' qualche minuto ma lo spread e' available ad una frequenza molto piu alta. L'esempio e' quello delle misure di riscio ad alta frequenza: liquidity-adjusted intraday VaR.}
Suppose then to have observed the spread $S_t$ for $t\in\mathcal{T}$ (say, every one minute). Divide
now the interval between any two consecutive instants of $\mathcal{T}$
into $r$ equi-spaced sub-intervals. This will return the finer partition
\be\label{eq:Qr}
\mathcal{Q}^{\tonde{r}}=\Bigg\{\frac{1}{r},\frac{2}{r},\hdots,D\cdot J-\frac{1}{r},D\cdot J\Bigg\}.
\ee
We indicate with $S_{q}^{\tonde{r}}$ with $q\in\mathcal{Q}^{\tonde{r}}$ the spread prevailing at time $q$.
For given $m$ and $\ell$, multiples of $1/r$, the SHARP model in Definition \ref{def:SHARP} is extendible into a MIDAS-like framework
by specifying a dynamics for the instantaneous intensity $\mu_t$ on the coarser partition $\mathcal{T}\subset\mathcal{Q}^{\tonde{r}}$.
We name this extension MIDAS-SHARP (henceforth shortened in mSHARP) and we formalize its definition in what follows.
\begin{mydef}\label{def:mSHARP}
{\it Let $\mathcal{T}$ and $\mathcal{Q}^{\tonde{r}}$ be, respectively, the partitions defined in equations \eqref{eq:tinset} and \eqref{eq:Qr}. A discrete-time process $\tonde{S_t^{\tonde{r}}}_{t\in\mathcal{T}}$ is a MIDAS-SHARP process if
\begin{equation}\label{midas_model}
\begin{array}{rll}
\mathbb{P}_{t-1}\quadre{S_t^{\tonde{r}}=k}&=&\lambda_t^k\,\frac{e^{-\lambda_t}}{k!},\quad k=0,1,2,\hdots,\quad t\in\mathcal{T}\\
\lambda_t&=&\varphi_{j_t}^{\tonde{r}} \,\mu_t,\\
\mu_t&=& (1-\Sigma_{\alpha}) +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}^{(r)}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1}^{(r)} +\alpha^{\tonde{\ell}} \widetilde{S}_{t-\ell:t-1}^{(r)},
\end{array}
\end{equation}
where $\Sigma_{\alpha}=\alpha^{\tonde{s}}+\alpha^{\tonde{m}}+\alpha^{\tonde{\ell}}<1$, with $\alpha^{\tonde{s}}>0$, $\alpha^{\tonde{m}}>0$, $\alpha^{\tonde{\ell}}>0$ and $\varphi_{j_t}^{(r)}=\varphi_{t - \left \lfloor{t/J}\right \rfloor \cdot J}^{(r)}$ is a periodic and positive deterministic process. The averages $\widetilde{S}_{t_1:t_2}^{(r)}$ are defined as
$$
\widetilde{S}_{t_1:t_2}^{(r)}= \frac{1}{(t_2-t_1) \,r+1}\,\sum_{q\in\{t_1, t_1+\frac{1}{r},..., t_2\}} \frac{S_{q}^{(r)}}{\varphi_{j_q}^{\tonde{r}}},
$$
%and the \nota{intra-interval?} spreads $S^{\tonde{r}}_{q}$ with $q\in\mathcal{Q}^{\tonde{r}}$ \nota{$q\in\mathcal{Q}^{\tonde{r}}\setminus\mathcal{T}$} are
%random variables such that \\ \\
and, for a given $q\in\mathcal{Q}^{\tonde{r}}\setminus\mathcal{T}$, the time-series $\tonde{S^{\tonde{r}}_{j_q+d\,J}}_{d\in\graffe{0,1,...,D-1}}$ is non-negative and covariance stationary, with
\be\label{eq:assumption-definition}
\frac{1}{D}\sum_{d=0}^{D-1}S^{\tonde{r}}_{j_q+d\,J}\stackrel{p}{\ra}\varphi_{j_q}^{\tonde{r}}=\E{S_{q}^{(r)}}<\infty.
\ee
}
\end{mydef}
Note that there is a peculiar difference between the SHARP and the mSHARP. Both models specify the dynamics of the spread on
the coarser time grid $\mathcal{T}$, nevertheless the mSHARP requires additional assumptions on the regularity of the random variables $S_{q}^{(r)}$ defined on the %\nota{finer}
%partition %$\mathcal{Q}^{\tonde{r}}$
set $\mathcal{Q}^{\tonde{r}}\setminus\mathcal{T}$ %\nota{and}
whose dynamics is left unspecified. For these variables, we assume some regularity conditions
in order to guarantee that the results of Theorem \ref{th:LLN} are still valid in this setting.
Indeed, the process in Definition \eqref{def:mSHARP} \nota{qui viene definizione (2) e non risulta molto chiaro il riferimento alla Definition 2} can be seen as a
simple SHARP with additional covariance-stationary regressors. Therefore, the statement and the proof
of Theorem \ref{th:LLN} can be straightforwardly extended to this case.
In particular, it is important to note that the assumption in equation \eqref{eq:assumption-definition} has a double implication. First, it
implicitly imposes that the unconditional expected value $ \E{S_{q}^{(r)}}$ is finite and periodic, with period $J$, in fact
$$
\E{S_{q+J}^{(r)}} = \varphi_{j_{q+J}}^{\tonde{r}} = \varphi_{j_q}^{\tonde{r}} = \E{S_{q}^{(r)}}.
$$
Second, it is needed to
guarantee the consistency of the estimator $\widehat{\varphi}_{j_t}$ %in \eqref{eq:phi_hat} %from the time grid $\mathcal{T}$
%(where is guaranteed by Theorem \ref{th:LLN})
in the set $\mathcal{Q}^{\tonde{r}}\setminus\mathcal{T}$. In all the empirical applications that follow, the mSHARP is
estimated following the same two-step procedure adopted for the SHARP. In the first step, the intraday pattern
of the spreads $S^{\tonde{r}}_{q}$, with $q\in\mathcal{Q}^{\tonde{r}}$, is estimated through the estimator defined in \eqref{eq:assumption-definition}. In the second step, the parameters $\alpha^{\tonde{s}}, \alpha^{\tonde{m}}, \alpha^{\tonde{\ell}}$ are estimated maximising the log-likelihood
\ba
\hspace*{-0.75cm}
\log \mathcal{L}(\boldsymbol{\vartheta})&=&\sum_{t\in\mathcal{T} }\Big\{ -\log (S_t !) +\accapo
& -& \widehat{\varphi}_{j_t}^{(r)} \left( 1-\Sigma_\alpha +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}^{(r)}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1}^{(r)} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1}^{(r)} \right)+\accapo
&+& S_t\, \log\left[\widehat{\varphi}_{j_t}^{(r)} \left( 1-\Sigma_\alpha +\alpha^{\tonde{s}}\,\widetilde{S}_{t-1:t-1}^{(r)}+ \alpha^{\tonde{m}}\,\widetilde{S}_{t-m:t-1}^{(r)} +\alpha^{\tonde{\ell}}\,\widetilde{S}_{t-\ell:t-1}^{(r)} \right) \right]\Big\}.
\ea
Consistent standard errors for the vector of parameters
$$
(\boldsymbol{\varphi}^{(r)},\boldsymbol{\vartheta})=(\varphi_{1/r}^{(r)}, \varphi_{2/r}^{(r)}, ..., \varphi_{J-1/r}^{(r)},\varphi_{J}^{(r)},\boldsymbol{\vartheta})
$$
can be estimated as in equation \eqref{eq:robust_se}. Finally, note that, for given $m$ and $\ell$, the mSHARP is robust to misspecifications in the Poisson conditional distribution, as it happens for the SHARP.
%The use of a mixed frequency approach could be important also for optimal execution strategies (see Section 5.3) where the optimal trading instant is selected comparing the spread inside a wide interval of several minutes (see \cite{taylor2002economic,gross2013predicting}).
\section*{Empirical Analysis} \label{sec:Empirical}
We compare the forecasting performances of the newly defined
SHARP, olsSHARP and mSHARP
models\footnote{All routines for estimating the SHARP models
are freely available in a form of a Matlab package.
This version of the paper is a blinded version so
any reference to authors and routines has been
temporarily removed.} with a set of benchmark alternatives along three dimensions: accuracy \citep[through the standard test by][]{diebold1995comparing},
miss-specification (through the Ljung-Box test statistics) and average machine time required to perform the estimation.
The following is the list (with some additional information) of the models
involved in the horse-race exercises discussed below.
\begin{enumerate}
\item The SHARP model of equation \eqref{seasonal_model}.
\item The olsSHARP model of equation \eqref{eq:S_HAR_2} (estimated with OLS).
\item The MIDAS-SHARP model of Section \ref{sec:MIDAS-SHARP}.
\item The Long Memory ACP (LMACP) model of \cite{gross2013predicting}. This is a natural benchmark\footnote{Actually, there exist two versions of the LMACP, in this paper we will refer always to that with superior forecasting accuracy. For more details we refer to the original paper.} since it
has already been proved to
possess superior forecasting abilities for the spread
with respect to simple moving average models and Multiplicative Error Models.
Besides, to the best of our knowledge, it is the unique model
in the literature that features simultaneously seasonality, discreteness (specifying the conditional distribution with a double Poisson distribution) and long memory. The LMACP is defined through the set of equations
\begin{equation*}
\begin{array}{rll}
\mathbb{P}_{t-1}\quadre{S_t=k}&=&c(\gamma,\lambda_t^{\prime}) \sqrt{\gamma}\, e^{-\gamma \lambda_t^{\prime}} \left(\frac{e^{-k}k^k}{k!} \right) \left(\frac{\lambda_t^{\prime}\,e}{k} \right)^{\gamma\,k},\quad k=0,1,2,\hdots,\\
\lambda_t^{\prime} &=& \lambda_t\,\exp(s_{j_t}),\\
s_{j_t}&=&\delta^s \frac{j_t}{J} + \sum_{l=1}^L \left(\delta^s_{1,l} \cos\left(\frac{j_t}{J} 2 \pi l\right) +\delta^s_{2,l} \sin\left(\frac{j_t}{J} 2 \pi l\right) \right),\\
\lambda_t &=& \omega + (\varphi-\beta) S_{t-1} + \beta \lambda_{t-1} - \sum_{g=1}^\infty \frac{\Gamma(g-d)}{\Gamma(-d) \Gamma(g+1)} ( S_{t-g}-\varphi S_{t-g-1} ),
\end{array}
\end{equation*}
where $\omega\in \mathbb{R}_0^+$, $0$ dobbiamo tirare fuori la letteratura sui processi a volori interi, che loss function usano?}
where the round function is used in order to obtain a
forecast coherent for a discrete-valued
process\footnote{As an alternative, one could use the conditional median, as proposed by \cite{freeland2004forecasting}.}. Clearly, the forecast in \eqref{eq:h_forecast} is model-dependent. Besides, the total number of forecasts varies, according to the frequency used,
from $T=\tonde{244-10}\times 390=91\,260$ for the one-minute frequency to $T=\tonde{122-5}\times 4\,680=547\,560$ for the five-second case.
\subsection*{Comparative analysis of the models' performances.} To evaluate the accuracy of the forecasting models,
we use the mean absolute error (MAE)
and mean squared error (MSE) loss functions, defined respectively as
\be\label{eq:LOSS}
\text{MAE} = \frac{1}{T}\sum_{t=1}^{T}\abs{S_{t+1}-\widehat{S}_{t+1|t}},\quad \text{MSE} = \frac{1}{T}\sum_{t=1}^{T}\tonde{S_{t+1}-\widehat{S}_{t+1|t}}^2,
\ee
where the sums are computed across the forecasts in 2014
and where $S_{t+1}$ denotes the observed spread.
Table \ref{table:forecasting_1_min} and Table \ref{table:forecasting_5_sec} report, for each stock in the dataset,
the loss functions for all the seven models and for the
one-minute and five-second frequencies, respectively.
We also add the results of the one-sided \cite{diebold1995comparing} test comparing
the loss functions \eqref{eq:LOSS} of each model with that of
the mSHARP, which turns out to be almost always the smallest. One, two or three stars signal that the mSHARP predictions are significantly better at, respectively, $10\%$, $1\%$ and $0.1\%$ confidence level\footnote{Since the loss functions are characterized by heteroskedasticity, we employ, for the denominator of the \cite{diebold1995comparing} test statistics, the HAC estimator of \cite{newey1986simple}, with a Parzen Kernel and a bandwidth estimated with the OLS procedure \citep{andrews1991heteroskedasticity}.
We thank an anonymous referee for this suggestion.}.
The results reported in Table \ref{table:forecasting_1_min}
and Table \ref{table:forecasting_5_sec} deliver a clear message, the mSHARP is the favorite model
in terms of accuracy. Results are strongly significant thanks to the large number of observations involved in the forecasting exercize ($T\sim 10^{5}$ for the one-minute frequency and to $T\sim5\times 10^{5}$ for the five-second one).
However, a distinction is needed
between small tick stocks (such as IBM) and large tick ones (such as BAC). For the latter, the dynamics
of the bid-ask spread is typically quite trivial, and the simple seasonal model
provides forecasts comparable to other more sophisticated approaches.
\begin{table}[]
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lccccccc}
\multicolumn{8}{c}{\textbf{Out-of-sample accuracy (one minute).}}\\
\hline
\multicolumn{8}{c}{ }\\
&mSHARP &SHARP&olsSHARP &LMACP &sACP &seasonal\, &RW\, \\
%& & \\
\cline{2-8}\\
%& & \\
\multirow{2}{4em}{BAC}
&{ $0.013$}&{ $0.013$}&{ $0.013$}&{ $0.026^{***}$}&{ $0.015^{***}$}&{ $0.013^{*}$}&{ $0.025^{***}$}\\
&{ $0.013$}&{ $0.013$}&{ $0.013$}&{ $0.031^{***}$}&{ $0.015^{***}$}&{ $0.013^{*}$}&{ $0.025^{***}$}\\ & \\
\multirow{2}{4em}{VZ}
&{ $0.108$}&{ $0.108$}&{ $0.109^{**}$}&{ $0.135^{***}$}&{ $0.109^{**}$}&{ $0.113^{***}$}&{ $0.164^{***}$}\\
&{ $0.125$}&{ $0.125$}&{ $0.127^{*}$}&{ $0.277^{***}$}&{ $0.126$}&{ $0.127$}&{ $0.187^{***}$}\\ & \\
\multirow{2}{4em}{GM}
&{ $0.116$}&{ $0.118^{***}$}&{ $0.118^{***}$}&{ $0.134^{***}$}&{ $0.118^{***}$}&{ $0.123^{***}$}&{ $0.183^{***}$}\\
&{ $0.133$}&{ $0.134^{*}$}&{ $0.135^{***}$}&{ $0.171^{***}$}&{ $0.134^{*}$}&{ $0.136^{**}$}&{ $0.206^{***}$}\\ & \\
\multirow{2}{4em}{DAL}
&{ $0.269$}&{ $0.275^{***}$}&{ $0.276^{***}$}&{ $0.302^{***}$}&{ $0.275^{***}$}&{ $0.290^{***}$}&{ $0.362^{***}$}\\
&{ $0.332$}&{ $0.340^{***}$}&{ $0.344^{***}$}&{ $0.405^{***}$}&{ $0.341^{**}$}&{ $0.363^{***}$}&{ $0.454^{***}$}\\ & \\
\multirow{2}{4em}{HAL}
&{ $0.419$}&{ $0.429^{***}$}&{ $0.431^{***}$}&{ $0.465^{***}$}&{ $0.434^{***}$}&{ $0.483^{***}$}&{ $0.531^{***}$}\\
&{ $0.607$}&{ $0.628^{***}$}&{ $0.658^{**}$}&{ $0.751^{***}$}&{ $0.622^{*}$}&{ $0.695^{***}$}&{ $0.838^{***}$}\\ & \\
\multirow{2}{4em}{XOM}
&{ $0.507$}&{ $0.522^{***}$}&{ $0.522^{***}$}&{ $0.568^{***}$}&{ $0.532^{***}$}&{ $0.589^{***}$}&{ $0.624^{***}$}\\
&{ $0.888$}&{ $0.922^{***}$}&{ $0.929^{**}$}&{ $1.140^{***}$}&{ $0.924^{**}$}&{ $1.021^{***}$}&{ $1.224^{***}$}\\ & \\
\multirow{2}{4em}{VLO}
&{ $0.665$}&{ $0.680^{***}$}&{ $0.681^{***}$}&{ $0.699^{***}$}&{ $0.684^{***}$}&{ $0.742^{***}$}&{ $0.826^{***}$}\\
&{ $1.181$}&{ $1.218^{***}$}&{ $1.251^{***}$}&{ $1.316^{***}$}&{ $1.215^{**}$}&{ $1.312^{***}$}&{ $1.758^{***}$}\\ & \\
\multirow{2}{4em}{CVX}
&{ $0.781$}&{ $0.802^{***}$}&{ $0.804^{***}$}&{ $0.823^{***}$}&{ $0.820^{***}$}&{ $0.904^{***}$}&{ $0.996^{***}$}\\
&{ $1.833$}&{ $1.919^{***}$}&{ $1.955^{***}$}&{ $2.065^{***}$}&{ $1.948^{***}$}&{ $2.164^{***}$}&{ $2.811^{***}$}\\ & \\
\multirow{2}{4em}{APC}
&{ $1.459$}&{ $1.477^{***}$}&{ $1.476^{***}$}&{ $1.550^{***}$}&{ $1.547^{***}$}&{ $1.682^{***}$}&{ $1.845^{***}$}\\
&{ $5.819$}&{ $5.877$}&{ $5.963^{***}$}&{ $6.406^{***}$}&{ $6.053^{*}$}&{ $6.853^{***}$}&{ $8.520^{***}$}\\ & \\
\multirow{2}{4em}{IBM}
&{ $2.331$}&{ $2.363^{***}$}&{ $2.364^{***}$}&{ $2.436^{***}$}&{ $2.483^{***}$}&{ $2.701^{***}$}&{ $2.986^{***}$}\\
&{ $11.768$}&{ $12.015^{***}$}&{ $12.036^{***}$}&{ $12.872^{***}$}&{ $12.887^{***}$}&{ $14.689^{***}$}&{ $19.371^{***}$}\\ & \\
\hline
\hline
\end{tabular}}
\end{center}
\caption{\footnotesize{Average of the loss functions (for each model the first line is ABS and the second is MSE) calculated with a moving window of ten days and a frequency of one minute. The symbols ***,**,* mean that the mSHARP delivers more accurate forecasts according to the \cite{diebold1995comparing} test (calculated with HAC standard errors) at 0.1\%, 1\%, 10\% significance level, respectively. Equal (in value) but statistically different loss functions are solely due to the limited number of digits shown.}}\label{table:forecasting_1_min}
\end{table}
\begin{table}[]
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lccccccc}
\multicolumn{8}{c}{\textbf{Out-of-sample accuracy (five seconds).}}\\
\hline
\multicolumn{8}{c}{ }\\
&mSHARP &SHARP&olsSHARP &LMACP &sACP &seasonal &RW \\
\cline{2-8}\\
\multirow{2}{4em}{BAC}
&{ $0.015$}&{ $0.016^{***}$}&{ $0.015$}&{ $0.018^{***}$}&{ $0.016^{***}$}&{ $0.015$}&{ $0.020^{***}$}\\
&{ $0.015$}&{ $0.016^{***}$}&{ $0.015$}&{ $0.019^{**}$}&{ $0.016^{***}$}&{ $0.015$}&{ $0.020^{***}$}\\ & \\
\multirow{2}{4em}{VZ}
&{ $0.093$}&{ $0.093^{*}$}&{ $0.094^{***}$}&{ $0.127^{***}$}&{ $0.094^{***}$}&{ $0.100^{***}$}&{ $0.115^{***}$}\\
&{ $0.099$}&{ $0.099^{*}$}&{ $0.100^{**}$}&{ $0.184^{***}$}&{ $0.099$}&{ $0.110^{***}$}&{ $0.123^{***}$}\\ & \\
\multirow{2}{4em}{GM}
&{ $0.122$}&{ $0.123^{***}$}&{ $0.123^{***}$}&{ $0.171^{***}$}&{ $0.124^{***}$}&{ $0.130^{***}$}&{ $0.150^{***}$}\\
&{ $0.128$}&{ $0.129^{***}$}&{ $0.129^{***}$}&{ $0.220^{***}$}&{ $0.128^{*}$}&{ $0.139^{***}$}&{ $0.156^{***}$}\\ & \\
\multirow{2}{4em}{DAL}
&{ $0.235$}&{ $0.237^{***}$}&{ $0.237^{***}$}&{ $0.272^{***}$}&{ $0.238^{***}$}&{ $0.261^{***}$}&{ $0.263^{***}$}\\
&{ $0.251$}&{ $0.254^{***}$}&{ $0.254^{***}$}&{ $0.327^{***}$}&{ $0.254^{***}$}&{ $0.289^{***}$}&{ $0.288^{***}$}\\ & \\
\multirow{2}{4em}{HAL}
&{ $0.301$}&{ $0.302^{***}$}&{ $0.304^{***}$}&{ $0.330^{***}$}&{ $0.307^{***}$}&{ $0.378^{***}$}&{ $0.325^{***}$}\\
&{ $0.350$}&{ $0.353^{**}$}&{ $0.357^{***}$}&{ $0.412^{***}$}&{ $0.356^{***}$}&{ $0.467^{***}$}&{ $0.391^{***}$}\\ & \\
\multirow{2}{4em}{XOM}
&{ $0.413$}&{ $0.416^{***}$}&{ $0.421^{***}$}&{ $0.452^{***}$}&{ $0.421^{***}$}&{ $0.547^{***}$}&{ $0.452^{***}$}\\
&{ $0.604$}&{ $0.614^{***}$}&{ $0.624^{***}$}&{ $0.818^{***}$}&{ $0.618^{***}$}&{ $0.908^{***}$}&{ $0.746^{***}$}\\ & \\
\multirow{2}{4em}{VLO}
&{ $0.594$}&{ $0.597^{***}$}&{ $0.609^{***}$}&{ $0.601^{***}$}&{ $0.603^{***}$}&{ $0.804^{***}$}&{ $0.613^{***}$}\\
&{ $0.946$}&{ $0.953^{***}$}&{ $0.964^{***}$}&{ $1.049^{***}$}&{ $0.973^{***}$}&{ $1.441^{***}$}&{ $1.184^{***}$}\\ & \\
\multirow{2}{4em}{CVX}
&{ $0.666$}&{ $0.670^{***}$}&{ $0.679^{***}$}&{ $0.673^{***}$}&{ $0.678^{***}$}&{ $0.846^{***}$}&{ $0.717^{***}$}\\
&{ $1.249$}&{ $1.266^{***}$}&{ $1.290^{***}$}&{ $1.361^{***}$}&{ $1.284^{***}$}&{ $1.869^{***}$}&{ $1.626^{***}$}\\ & \\
\multirow{2}{4em}{APC}
&{ $1.149$}&{ $1.151^{***}$}&{ $1.163^{***}$}&{ $1.134$}&{ $1.159^{***}$}&{ $1.721^{***}$}&{ $1.112$}\\
&{ $3.198$}&{ $3.208^{***}$}&{ $3.226^{***}$}&{ $3.267^{***}$}&{ $3.287^{***}$}&{ $6.287^{***}$}&{ $3.936^{***}$}\\ & \\
\multirow{2}{4em}{IBM}
&{ $1.896$}&{ $1.899^{***}$}&{ $1.906^{***}$}&{ $1.893$}&{ $1.921^{***}$}&{ $2.706^{***}$}&{ $1.887$}\\
&{ $8.168$}&{ $8.192^{***}$}&{ $8.204^{***}$}&{ $8.199$}&{ $8.369^{***}$}&{ $14.504^{***}$}&{ $10.677^{***}$}\\ & \\
\hline
\hline
\end{tabular}
}
\end{center}
\caption{\footnotesize{Average of the loss functions (for each model the first line is ABS and the second is MSE)
calculated with a rolling window of five days and a frequency of five seconds.
The symbols ***,**,* mean that the mSHARP delivers more accurate forecasts according to the \cite{diebold1995comparing} test (calculated with HAC standard errors) at 0.1\%, 1\%, 10\% significance level, respectively. Equal (in value) but statistically different loss functions are solely due to the limited number of digits shown. }}\label{table:forecasting_5_sec}
\end{table}
\clearpage
Aside from being accurate, a good forecasting model should be also well-specified. Hence, to complete the picture of the empirical
comparative analysis, we report
in Table \ref{table:LB_1min} and in Table \ref{table:LB_5sec}, respectively for the one-minute and five-second
frequencies, the averages of the p-values of the Ljung-Box
statistics for the Pearson's residuals
\be\label{eq:pearsons}
\eta_t=\frac{S_t- \mathbb{E}[S_t\,|\,\mathcal{F}_{t-1}]}{ \sqrt{\text{Var}[S_t\,|\,\mathcal{F}_{t-1}]}},
\ee
where $S_t$, as usual, denotes the
observed spread, while $\mathbb{E}[S_t\,|\,\mathcal{F}_{t-1}]$ and $\text{Var}[S_t\,|\,\mathcal{F}_{t-1}]$ are,
respectively, its conditional mean and variance under the assumed model.
A p-value of the Ljung-Box statistics below $1\%$ is typically interpreted as a
misspecification of the model,
more precisely the null of uncorrelated residuals is rejected with
$99\%$ confidence. On the contrary, when the model is well-specified,
the $\eta$'s in \eqref{eq:pearsons} are a series of random variable with zero mean, unit variance and no autocorrelation.
The p-values in Table \ref{table:LB_1min} reveal that, at one-minute frequency,
the mSHARP, the SHARP and the olsSHARP are well-specified at any lags and for any kind of stock, from
the large (BAC) to the small (IBM) tick class, even though the p-values are quite close to a rejection
for small tick stocks and for large lags.
LMACP and sACP perform worse, especially for small tick stocks.
This can be ascribed to the fact that, at one-minute frequency, the persistency of the time series is not
particularly pronounced and so the long-memory version of the ACP does not depart much
from it in term of goodness of specification. This distortion is mitigated
for large tick stocks where a good estimate of the seasonality pattern
is enough to not reject the null of uncorrelated residuals.
Nevertheless, as witnessed by the p-values in Table \ref{table:LB_1min}, at the frequency of five seconds
we draw a quite different picture: the LMACP is the best
choice in term of model specification, especially for small tick stocks. This is mainly
a consequence of the fact that, at such a high frequency, the persistence of the time series is relevant,
in particular for small tick stocks. The LMACP, being a purely long-memory process,
is designed to capture such a feature, producing therefore well-specified forecasts.
Not surprisingly, the purely seasonal model and the random walk are dramatically misspecified at
any frequency and for any kind of stock.
%%%
\begin{table}[!ht]
\footnotesize{
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lccccccc}
\multicolumn{8}{c}{\textbf{Ljung-Box test statistics (one minute)}.}\\
\hline
\multicolumn{8}{c}{ }\\
&mSHARP&SHARP&olsSHARP &LMACP &sACP &seasonal &RW \\
%& & \\
\cline{2-8}\\
%& & \\
\multirow{3}{4em}{BAC}
&{ $0.743$}&{ $0.555$}&{ $0.961$}&{ $0.658$}&{ $0.394$}&{ $0.226$}&{ $0.000$}\\
&{ $0.717$}&{ $0.780$}&{ $0.217$}&{ $0.533$}&{ $0.406$}&{ $0.072$}&{ $0.000$}\\
&{ $0.323$}&{ $0.298$}&{ $0.063$}&{ $0.499$}&{ $0.258$}&{ $0.072$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{VZ}
&{ $0.697$}&{ $0.577$}&{ $0.873$}&{ $0.187$}&{ $0.355$}&{ $0.006$}&{ $0.000$}\\
&{ $0.379$}&{ $0.449$}&{ $0.333$}&{ $0.037$}&{ $0.380$}&{ $0.001$}&{ $0.000$}\\
&{ $0.152$}&{ $0.145$}&{ $0.086$}&{ $0.041$}&{ $0.104$}&{ $0.003$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{GM}
&{ $0.823$}&{ $0.771$}&{ $0.906$}&{ $0.170$}&{ $0.388$}&{ $0.007$}&{ $0.000$}\\
&{ $0.423$}&{ $0.501$}&{ $0.364$}&{ $0.023$}&{ $0.462$}&{ $0.003$}&{ $0.000$}\\
&{ $0.142$}&{ $0.152$}&{ $0.077$}&{ $0.015$}&{ $0.120$}&{ $0.005$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{DAL}
&{ $0.738$}&{ $0.845$}&{ $0.952$}&{ $0.048$}&{ $0.511$}&{ $0.001$}&{ $0.000$}\\
&{ $0.218$}&{ $0.323$}&{ $0.307$}&{ $0.004$}&{ $0.384$}&{ $0.000$}&{ $0.000$}\\
&{ $0.138$}&{ $0.121$}&{ $0.105$}&{ $0.000$}&{ $0.073$}&{ $0.001$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{HAL}
&{ $0.618$}&{ $0.675$}&{ $0.869$}&{ $0.099$}&{ $0.337$}&{ $0.000$}&{ $0.000$}\\
&{ $0.115$}&{ $0.121$}&{ $0.166$}&{ $0.049$}&{ $0.100$}&{ $0.000$}&{ $0.000$}\\
&{ $0.089$}&{ $0.071$}&{ $0.106$}&{ $0.010$}&{ $0.011$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{XOM}
&{ $0.634$}&{ $0.721$}&{ $0.862$}&{ $0.083$}&{ $0.297$}&{ $0.000$}&{ $0.000$}\\
&{ $0.092$}&{ $0.096$}&{ $0.171$}&{ $0.025$}&{ $0.034$}&{ $0.000$}&{ $0.000$}\\
&{ $0.057$}&{ $0.036$}&{ $0.041$}&{ $0.002$}&{ $0.001$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{VLO}
&{ $0.637$}&{ $0.655$}&{ $0.814$}&{ $0.125$}&{ $0.211$}&{ $0.000$}&{ $0.000$}\\
&{ $0.057$}&{ $0.054$}&{ $0.090$}&{ $0.027$}&{ $0.040$}&{ $0.000$}&{ $0.000$}\\
&{ $0.060$}&{ $0.023$}&{ $0.080$}&{ $0.013$}&{ $0.003$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{CVX}
&{ $0.487$}&{ $0.580$}&{ $0.758$}&{ $0.113$}&{ $0.155$}&{ $0.000$}&{ $0.000$}\\
&{ $0.027$}&{ $0.033$}&{ $0.088$}&{ $0.028$}&{ $0.003$}&{ $0.000$}&{ $0.000$}\\
&{ $0.041$}&{ $0.026$}&{ $0.045$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{APC}
&{ $0.561$}&{ $0.625$}&{ $0.699$}&{ $0.016$}&{ $0.088$}&{ $0.000$}&{ $0.000$}\\
&{ $0.030$}&{ $0.041$}&{ $0.080$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\
&{ $0.022$}&{ $0.019$}&{ $0.015$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{IBM}
&{ $0.711$}&{ $0.732$}&{ $0.719$}&{ $0.114$}&{ $0.056$}&{ $0.000$}&{ $0.000$}\\
&{ $0.048$}&{ $0.064$}&{ $0.063$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\
&{ $0.058$}&{ $0.059$}&{ $0.028$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\hline
\end{tabular}
}
\end{center}
\caption{\footnotesize{In this table we report the mean of the Ljung-Box test statistics for the Pearson residuals of each model calculated with $1$ (first line), $10$ (second line) and $390$ (third line) lags and with a moving window of ten days at a frequency of one minute. The
average is calculated over all the iterations of the moving window.}}\label{table:LB_1min}
}
\end{table}
%%%
\begin{table}[ht!]
\footnotesize{
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lccccccc}
\multicolumn{8}{c}{\textbf{Ljung-Box test statistics (five seconds)}.}\\
\hline
\multicolumn{8}{c}{}\\
&mSHARP &SHARP&olsSHARP &LMACP &sACP &seasonal &RW \\
%& & \\
\cline{2-8}\\
%& & \\
\multirow{3}{1em}{BAC}
&{ $0.064$}&{ $0.021$}&{ $0.196$}&{ $0.007$}&{ $0.104$}&{ $0.000$}&{ $0.000$}\\
&{ $0.157$}&{ $0.088$}&{ $0.006$}&{ $0.019$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{VZ}
&{ $0.156$}&{ $0.059$}&{ $0.182$}&{ $0.002$}&{ $0.203$}&{ $0.000$}&{ $0.000$}\\
&{ $0.010$}&{ $0.007$}&{ $0.001$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{GM}
&{ $0.340$}&{ $0.158$}&{ $0.217$}&{ $0.014$}&{ $0.124$}&{ $0.000$}&{ $0.000$}\\
&{ $0.001$}&{ $0.001$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{DAL}
&{ $0.512$}&{ $0.265$}&{ $0.240$}&{ $0.012$}&{ $0.037$}&{ $0.000$}&{ $0.000$}\\
&{ $0.001$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{HAL}
&{ $0.482$}&{ $0.267$}&{ $0.225$}&{ $0.032$}&{ $0.147$}&{ $0.000$}&{ $0.000$}\\
&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{XOM}
&{ $0.373$}&{ $0.164$}&{ $0.217$}&{ $0.017$}&{ $0.067$}&{ $0.000$}&{ $0.000$}\\
&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{VLO}
&{ $0.115$}&{ $0.063$}&{ $0.056$}&{ $0.042$}&{ $0.033$}&{ $0.000$}&{ $0.000$}\\
&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{CVX}
&{ $0.261$}&{ $0.119$}&{ $0.137$}&{ $0.046$}&{ $0.093$}&{ $0.000$}&{ $0.000$}\\
&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.007$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{APC}
&{ $0.040$}&{ $0.025$}&{ $0.002$}&{ $0.192$}&{ $0.195$}&{ $0.000$}&{ $0.000$}\\
&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.079$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\multirow{3}{4em}{IBM}
&{ $0.027$}&{ $0.015$}&{ $0.002$}&{ $0.199$}&{ $0.202$}&{ $0.000$}&{ $0.000$}\\
&{ $0.000$}&{ $0.000$}&{ $0.000$}&{ $0.141$}&{ $0.000$}&{ $0.000$}&{ $0.000$}\\ & \\
\hline
\end{tabular}
}
\end{center}
\caption{\footnotesize{In this table we report the mean of the Ljung-Box test statistics for the Pearson residuals of each model calculated
with $1$ (first line) and $10$ lags (second line), with a moving window of five days and at a frequency of five seconds. The
average is calculated over all the iterations of the moving window.
The results with the maximum number of lags, i.e. $4\,680$, are not shown
because they are always smaller than $10^{-3}$ except for the case of the BAC with the LMACP model, for which it is equal to $0.151$.}}\label{table:LB_5sec}
}
\end{table}
%%%
Finally consider that, in empirical applications,
the model that requires, ceteris paribus, the smallest amount of time to complete the forecast is the preferred one. This is especially
true whenever the predictions are used to determine the decisions of an
optimal execution strategy and, consequently, a rapid update of the model parameters is needed.
Hence, the time required to estimate
the model is a feature that influences the global evaluation of its
performances. For this reason, the average machine time needed to estimate the seven models is shown in Table \ref{table:est_times}.
They are in line with what is expected
from the analytical properties of each model. In fact,
the estimation of long memory processes is known to be quite cumbersome.
On the other hand, the analytical properties of the
SHARP (and of the mSHARP/olsSHARP), that is the concavity of the log-likelihood and the simplicity of the intraday pattern estimator,
translates in a very rapid optimization of the likelihood. In particular, being the olsSHARP
estimated with an analytical expression, the estimation time is almost negligible.
%%%
\begin{table}[!ht]
\begin{center}
\begin{tabular}{l c c c c c c c}
\multicolumn{8}{c}{\textbf{Average machine time}}\\
\hline
& & & & & & & \\
&mSHARP&SHARP&olsSHARP &LMACP&sACP & Seasonal & RW\\[0.1cm]
\cline{2-8}\\[0.1cm]
1 min. &0.082 &0.097&0.002&85.36 &0.081 & - & - \\[0.2cm]
5 sec. &0.224&0.182&0.005&284.5 &0.282 & -& - \\
& & & & & & & \\
\hline
\end{tabular}
\end{center}
\caption{\footnotesize{his table reports the average time (in seconds, averaged across all ten stocks and across all the estimation windows) required to estimate each model. For the frequency of one minute the seven models are estimated in a time window of ten days, while for the frequency of five seconds they are estimated in a time window of five days. Optimization is achieved through the Sequential Quadratic Programming (SQP) implemented in Matlab. We used an Intel Core i5-2450M CPU, 2.50GHz with 4 processors (including cores). The estimation times for the Seasonal and the Random Walk models are not reported since they are close to zero.}}\label{table:est_times}
\end{table}
\subsection*{An application to optimal execution}
\nota{Qui R1 e AE ci chiede di aggiungere una comparison con sACP. Potrebbe essere complicata!}
In this section we show that, under the assumption of
martingality for the mid-quote
process, it is possible to
use the predictions of our
model to develop a trading schedule which
reduces
the transaction costs
%of a given
%Moreover,
%we discuss
%how the predictions produced by the SHARP model can be used to
%reduce transaction costs
with respect to trading strategies that
do not profit from any forecasting model for the bid-ask spread.
Since high-frequency trading is nowadays largely diffused, we
design an optimal execution strategy that profits from
the bid-ask spread forecasts produced by the SHARP model on an equi-spaced time grid of five seconds.
Hence, in what follows, the time index $t$
is assumed to belong to the partition $\mathcal{T}$ in \eqref{eq:tinset} with $J=4\,680$.
As anticipated, we work in
the assumption that the mid price process\footnote{Defined as the average price
between the best ask and the best bid, that is $M_t=(\ASK_t+\BID_t)/2$.} $M_t$ is a martingale, that is,
$$
\mathbb{E}[M_{t+h}\,|\,\mathcal{F}_t]=M_t,\quad \forall h\in\mathbb{N}_+.
$$
This assumption implies that the $h$-step ahead forecasts at time $t$ of the ask ($A_{t+h}$) and bid ($B_{t+h}$) prices
depend on the forecast of the spread $S_{t+h}$ as follows
\begin{equation}\label{eq:ask_bid}
\begin{array}{l}
\mathbb{E}[\ASK_{t+h}\,|\,\mathcal{F}_t]=\mathbb{E}[M_{t+h}\,+1/2(1+S_{t+h})|\,\mathcal{F}_t] = M_t+1/2 + 1/2\, \mathbb{E}[S_{t+h}|\,\mathcal{F}_t],\\[0.2cm]
\mathbb{E}[\BID_{t+h}\,|\,\mathcal{F}_t]=\mathbb{E}[M_{t+h}\,-1/2(1+S_{t+h})|\,\mathcal{F}_t] = M_t-1/2 - 1/2\, \mathbb{E}[S_{t+h}|\,\mathcal{F}_t].
\end{array}
\end{equation}
In particular, the smaller the $\mathbb{E}[S_{t+h}|\,\mathcal{F}_t]$
the smaller the $\mathbb{E}[\ASK_{t+h}\,|\,\mathcal{F}_t]$ and
the higher the $\mathbb{E}[\BID_{t+h}\,|\,\mathcal{F}_t]$.
For this reason, accurate spread predictions allow the selection of the instants characterized by (statistically) smaller ask prices and (statistically) higher bid prices.
In the empirical exercise described here,
we largely follow the approach of \cite{taylor2002economic} and \cite{gross2013predicting}.
Orders are assumed to be split into smaller trades and distributed over the day, a common
practice typically used to
reduce the price impact of transactions
\citep[see, for example,][]{almgren2001optimal}.
More specifically, each trading day $d$,
with $d=1,...,D$,
is divided into $J/K$ trading
intervals of ten minutes, with $K=120$.
Having also set $J=4\,680$,
this means that the entire trading
day of $6.5$ hours is divided into $39$
intervals of $10$ minutes\footnote{Results with different choices of $K$ are available upon request.}.
Within each of these intervals $i$, with $i=1,...,J/K$,
the trader has to decide which is the optimal instant to trade.
We compare three trading strategies for this decision.
In the first, which we address as the \enquote{uninformed strategy},
we consider the choice of a trader that has no information on the past
history of the bid-ask spread. Without any clue on the future, a rational choice for this trader
is to schedule the trading assigning equal probability to all the time instants.
More precisely, the trader places a
buy (resp. sell) market order at the best
available ask (resp. bid) price $\ASK^{\tonde{\textrm{U}}}_{i,d}$ (resp. $\BID^{\tonde{\textrm{U}}}_{i,d}$)
prevailing at a time instant randomly and uniformly extracted among the $K$ available, in formula
$$
\begin{array}{lll}
\ASK^{\tonde{\textrm{U}}}_{i,d} & = & \ASK_{(d-1) J +t_i^{\tonde{\textrm{U}}}},\\[0.1cm]
\BID^{\tonde{\textrm{U}}}_{i,d} & = & \BID_{(d-1) J +t_i^{\tonde{\textrm{U}}}},
\end{array}
$$
where $t_i^{\tonde{\textrm{U}}}$ are iid integer random variables uniformly distributed in $\graffe{\tonde{i-1}\,K+1,...,i\,K}$.
In the second strategy, we consider the optimal scheduling of a trader
that profits solely from the estimated seasonal pattern of the spread. To be optimal and minimize the costs, the trader has to place the market order in the instant defined by the smallest value of the estimated seasonal pattern within each interval $i$, facing
ask and bid price given, respectively, by
\be\label{eq:seasstrat}
\begin{array}{lll}
\ASK^{(\varphi)}_{i,d} &= & \ASK_{(d-1) J + t_{i}^{(\varphi)}},\\[0.1cm]
\BID^{(\varphi)}_{i,d} &= & \BID_{(d-1) J + t_{i}^{(\varphi)}},
\end{array}
\ee
where $t_{i}^{(\varphi)}$ is defined as
$$
t_{i}^{(\varphi)}= \argmin_{ (i-1)K+1\leq j \leq i K}\widehat{\varphi}_{j},
$$
and where the estimate $\widehat{\varphi}_{j}$ is computed, via the estimator \eqref{eq:phi_hat}, using the
five days prior to the day $d$ considered. We name this strategy the \enquote{seasonal strategy}.
In our third approach the trader
takes advantage of the bid-ask spread forecasts based on the SHARP model.
More precisely, at each instant in the $i$-th time interval of the day, the trader generates
predictions of spreads in the remaining instants of the interval, following the procedure\footnote{In particular the
procedure adopted to generate forecasts on a equi-spaced time grid of five seconds.} described in Section
\ref{sec:Empirical}.
Using these forecasts, the trader decides to trade as soon as the
realized spread is lower than all the forecasts
within the remaining instants of the interval. We name this strategy the \enquote{SHARP-based strategy}.
Following this scheduling the trader faces ask and bid prices given, respectively, by
$$
\begin{array}{lll}
\ASK^{\tonde{\textrm{S}}}_{i,d} & = & \ASK_{(d-1)\,J+t_{i}^{\tonde{\textrm{S}}}}\\
\BID^{\tonde{\textrm{S}}}_{i,d} & = & \BID_{(d-1)\,J+t_{i}^{\tonde{\textrm{S}}}}
\end{array}
$$
where $t_{i}^{\tonde{\textrm{S}}}$ is the first time instant in the set $\graffe{(i-1)\,K ,...,i K}$ such that\footnote{This trading algorithm requires the computation of the $z$-step-ahead forecasts, which can be obtained directly from equation \eqref{seasonal_model} adopting a standard iterated multi-period ahead approach \citep[see, for example,][]{marcellino2006comparison}
$$
\mathbb{E} \left[ S_{t+z} \, | \, \mathcal{F}_{t} \right] =\widehat{\varphi}_{j_{t+z}} \bigg( \left(1-\Sigma_\alpha \right) +\widehat{\alpha}^{(s)} \frac{ \mathbb{E} \left[ S_{t+z-1}\,|\,\mathcal{F}_{t} \right] }{\widehat{\varphi}_{j_{t+z-1}}} + \frac{\widehat{\alpha}^{(m)}}{m} \sum_{k=1}^{m} \frac{ \mathbb{E} \left[ S_{t+z-k}\,|\,\mathcal{F}_{t} \right]}{\widehat{\varphi}_{j_{t+z-k}}}+ \frac{\widehat{\alpha}^{(\ell)} }{\ell} \sum_{k=1}^{\ell} \frac{\mathbb{E} \left[ S_{t+z-k}\,|\,\mathcal{F}_{t} \right]}{\widehat{\varphi}_{j_{t+z-k}}} \bigg),\nonumber
$$
for $z\geq 1$ and where $\widehat{\varphi}_{j_t}$'s, $\widehat{\alpha}^{(s)}$, $\widehat{\alpha}^{(m)}$ and $\widehat{\alpha}^{(\ell)}$ are the estimated parameters of the SHARP model for the day considered.}
$$
\E{S_{(d-1)\,J+t_{i}^{\tonde{\textrm{S}}}+z}\,\cond\,\mathcal{F}_{(d-1)\,J+t_{i}^{\tonde{\textrm{S}}}}}>S_{(d-1)\,J+t_{i}^{\tonde{\textrm{S}}}},\quad \forall z\in\mathbb{N}_+,~z \leq i K-t_{i}^{\tonde{\textrm{S}}}.
$$
For any given interval $i$ of day $d$, the SHARP-based strategy is preferred to, say,
the uniformed one if $A_{i,d}^{\tonde{\textrm{U}}}-A_{i,d}^{\tonde{\textrm{S}}}>0$ or if $B_{i,d}^{\tonde{\textrm{U}}}-B_{i,d}^{\tonde{\textrm{S}}}<0$. More generally,
the gains, with respect to the uninformed strategy,
derived by following the SHARP-based trading schedule, can be defined as
\begin{equation} \label{eq:ask_bid_gain}
\begin{split}
\textrm{G}_{\ASK}^{\tonde{\textrm{U}}}&= \frac{1}{D} \sum_{d=1}^D \left( \frac{1}{J/K} \sum_{i=1}^{J/K} \frac{\ASK^{\tonde{\textrm{U}}}_{i,d} - \ASK^{\tonde{\textrm{S}}}_{i,d}}{\widebar{S}_{i,d}}\right),\\[5pt]
\textrm{G}_{\BID}^{\tonde{\textrm{U}}}&= \frac{1}{D} \sum_{d=1}^D \left( \frac{1}{J/K} \sum_{i=1}^{J/K} \frac{\BID^{\tonde{\textrm{S}}}_{i,d} - \BID^{\tonde{\textrm{U}}}_{i,d}}{\widebar{S}_{i,d}}\right),
\end{split}
\end{equation}
where $\widebar{S}_{i,d}=1/K\sum_{k=K i+1}^{K (i+1)} (S_{J (d-1) +k}+1)$
is the average spread in the $i$-th interval of day $d$. Of course, an identical
definition holds for the gain with respect to the seasonal strategy defined by the equations \eqref{eq:seasstrat}. The averages
in \eqref{eq:ask_bid_gain} must be interpreted as the average gains (in units of the average spread of the considered intervals) of adopting, for trade scheduling, SHARP-based spread predictions instead of the uniformed strategy. For example, a $\textrm{G}_{\ASK}^{\tonde{\textrm{U}}}$ of $0.1$ corresponds to a saving of one tenth
of the average spread costs in buying according to the SHARP-based strategy.
Table \ref{table:price_forecast} shows that the SHARP-based strategy brings
significant gains mostly for medium and small tick stocks, more precisely for XOM, VLO, CVX, APC and IBM, that is for stocks from the sixth to the tenth
spread quantile. This result confirms that the spread dynamics over the seasonal patter for large tick stocks is quite trivial
and a purely seasonal model could do a good job in predicting future values of the spread and, accordingly,
in reducing the execution costs of a trading strategy. Conversely, for medium and small tick stocks the adoption
of the SHARP model may save up to $20\%$ (resp. $16\%$) of the average spread costs in selling (resp. buying) the asset with respect to the seasonal strategy.
\begin{table}[ht!]
\begin{center}
\resizebox{\textwidth}{!}{%
\begin{tabular}{lcccccccccc}
\multicolumn{11}{c}{\textbf{Average gain of the SHARP-based trading schedule.}}\\
\hline\\[0.1cm]
&BAC&VZ &GM&DAL &HAL &XOM &VLO &CVX &APC &IBM \\[0.1cm]
%& & \\
\cline{2-11}\\
$\textrm{G}_{\ASK}^{\tonde{\textrm{U}}}$&-0.006&0.011&0.006&0.042&0.178**&0.186**&0.091**&0.156**&0.179**&0.143** \\[0.1cm]
$\textrm{G}_{\BID}^{\tonde{\textrm{U}}}$&0.017&0.073*&0.091**&0.149**&0.085&0.116**&0.226**&0.158**&0.104**&0.161** \\[0.1cm]
$\textrm{G}_{\ASK}^{(\varphi)}$&0.006&-0.014&-0.017&0.108*&0.104&0.160**&0.109**&0.161**&0.147**&0.143** \\[0.1cm]
$\textrm{G}_{\BID}^{(\varphi)}$&0.008*&0.098**&0.109**&0.081&0.152**&0.124**&0.209**&0.152**&0.127**&0.151** \\
& & & & & & \\
\hline
\end{tabular}}
\end{center}
\caption{\footnotesize{The rows $\textrm{G}_{\ASK}^{\tonde{\textrm{U}}}$ and $\textrm{G}_{\BID}^{\tonde{\textrm{U}}}$ and the rows $\textrm{G}_{\ASK}^{(\varphi)}$ and $\textrm{G}_{\BID}^{(\varphi)}$ report, respectively, the average gains (defined in equation \eqref{eq:ask_bid_gain}) in adopting a trading strategy based on SHARP forecasts with respect to the uniformed and seasonal trading strategy.
The gain $\textrm{G}_{\ASK}$ (resp. $\textrm{G}_{\BID}$) is positive when the average ask (resp. bid) price paid with the SHARP-based strategy is smaller (resp. higher) than that paid with the other schedule. We test the significance of $\textrm{G}_{\ASK}$ and $\textrm{G}_{\BID}$ conducting a t-test for the null hypothesis of zero mean. The symbols **, * mean that the test is significant at 5\%, 10\% level, respectively.}}\label{table:price_forecast}
\end{table}
\section*{Conclusions}\label{sec:conclusions}
This study proposes a parsimonious and fast-to-estimate
discrete-time forecasting model for integer-valued time series. The model features
seasonality and pseudo long-memory patterns.
The dependent variable is assumed, at each time instant,
to follow a Poisson distribution whose
conditional intensity is driven by a seasonal intraday
pattern and by three autoregressive components that produce a slowly decaying
autocorrelation function, as in the HAR model by \cite{corsi2009simple}. We name the model SHARP as
an acronym for Seasonal Heterogeneous Auto-Regressive Poisson.
Under very mild assumptions, we prove
asymptotic consistency of a non-parametric estimator of the seasonal intraday pattern.
Although the model is formulated for a generic integer-valued random variable
featuring seasonality and long-memory,
in our empirical application we put the focus on
bid-ask spreads of financial equity stocks, which play a
prominent role in optimal execution.
We prove that the SHARP model can be estimated either by maximizing a
concave log-likelihood function, a feature that guarantees a fast
and reliable numerical procedure,
or, analytically, via ordinary least squares (in this case we refer to it as olsSHARP).
Following the spirit of the mixed-data sampling approach by \cite{ghysels2007midas},
we propose an extension of the SHARP model, named MIDAS-SHARP (or mSHARP), which
exploits more efficiently the information flow generated by the historical time series.
On a dataset of ten representative NYSE stocks, selected in order to cover a wide range
of bid-ask spread dynamics, both the SHARP (independently of the estimation procedure adopted) and the mSHARP show,
in spite of their simple formulation, remarkably good forecasting performances, which we assess along three dimensions:
accuracy, misspecification and machine time needed for estimation.
Concerning the first, although the mSHARP does not formally generate long-memory
patterns, we prove that it is more accurate, both at moderate (one minute) and at high (five seconds)
sampling frequencies, than the
benchmark LMACP, which is a genuine long-memory model.
In terms of misspecification, the SHARP, olsSHARP and
mSHARP are well-specified both for large and small tick stocks at the frequency of one minute,
while the LMACP performs better at the higher frequency of five seconds, where persistency
becomes more relevant. Finally, the three
model specifications SHARP, olsSHARP and mSHARP
are estimated in negligible times.
The results of the comparison confirm that it is possible
to have competitive and reliable forecasts
of the bid-ask spread
without necessarily resorting to long-memory
processes, whose estimation procedure is often quite cumbersome and could be up to a thousand times
slower.
As an empirical application,
we show how bid-ask spread forecasts based on the SHARP model
provide insightful information for reducing the total costs of transacting.
We compare the gain of a trader
that schedules her trades according to the spread
forecasts provided by the SHARP,
with respect to other two benchmark strategies, an uninformed and a seasonal strategy in which,
respectively, the trader does not profit from any information from the past or profits solely from
the estimated seasonal pattern. The empirical comparison shows that SHARP-based scheduling
helps the traders in reducing the cost of transacting, especially for medium and small tick stocks.
\clearpage
{
\bibliographystyle{Chicago}
\bibliography{Omni}
}
\begin{appendices}
\section*{Proofs.}\label{app:LLN}
\paragraph{ Proof of Theorem 1}
From the last two lines of equation \eqref{seasonal_model}, we obtain that
\begin{equation}\label{eq:1}
\frac{\lambda_t}{\varphi_{j_t}} = \mu_t= (1-\Sigma_\alpha)+\alpha_H(B)\frac{S_t}{\varphi_{j_t}},
\end{equation}
with $\alpha_H(B)=\alpha^{\tonde{s}} B + \frac{\alpha^{\tonde{m}}}{m} \sum_{k=1}^{m} B^k + \frac{\alpha^{\tonde{\ell}}}{\ell} \sum_{k=1}^{\ell} B^k$. Now we define the martingale difference sequence $v_t$ as $v_t=S_t-\lambda_t$. Substituting $v_t$ into equation \eqref{eq:1}, we get that
\be\label{eq:to_be_inverted}
\tonde{1-\alpha_H\tonde{B}}\frac{S_t}{\varphi_{j_t}}=(1-\Sigma_\alpha)+\frac{v_t}{\varphi_{j_t}}.
\ee
The operator $(1-\alpha_H(B))$ that appears in equation \eqref{eq:to_be_inverted} is invertible if and only if its roots lie outside the unit circle, that is, if $\alpha^{(s)}+\alpha^{\tonde{m}}+\alpha^{\tonde{\ell}}<1$.
The invertibility of
$(1-\alpha_H(B))$ and equation \eqref{eq:to_be_inverted} imply that
\begin{equation}\label{eq:S}
S_t=\varphi_{j_t}+\varphi_{j_t} (1-\alpha_H(B))^{-1} \frac{v_t}{\varphi_{j_t}},
\end{equation}
so that, for a given $ h\in\graffe{1,...,J}$, consider the process
\begin{equation}\label{eq:Y_process}
Y^{\tonde{h}}_d = S_{h+d\,J}-\varphi_{h},~~d=0,1,...,D-1
\end{equation}
whose covariance at lag
$k$ is written as
\be\label{eq:cov_stat}
\begin{split}
\mathbb{E}\quadre{Y_{d}^{\tonde{h}}\,Y_{d+k}^{\tonde{h}}}
&=\mathbb{E}\left[\left(\varphi_{h} (1-\alpha_H(B))^{-1} \frac{v_{h+d\,J}}{\varphi_{h}}\right)\left(\varphi_{h} (1-\alpha_H(B))^{-1} \frac{v_{h+(d+k)\,J}}{\varphi_{h}}\right)\right]\\
&=\mathbb{E}\left[\left(\varphi_{h} \sum_{s=0}^{\infty} \delta_s B^s \frac{v_{h+d\,J}}{\varphi_{h}}\right)\left(\varphi_{h} \sum_{s^{\prime}=0}^{\infty} \delta_{s^{\prime}} B^{s^{\prime}} \frac{v_{h+(d+k)\,J}}{\varphi_{h}}\right)\right]\\
&=\mathbb{E}\left[\left(\varphi_{h} \sum_{s=0}^{\infty} \delta_s \frac{v_{h+d\,J-s}}{\varphi_{j_{h-s}}}\right)\left(\varphi_{h} \sum_{s^{\prime}=0}^{\infty} \delta_{s^{\prime}} \frac{v_{h+(d+k)\,J-s^{\prime}}}{\varphi_{j_{h-s^{\prime}}}}\right)\right]\\
&=\varphi_{h}^2 \sum_{s=0}^{\infty} \delta_s\, \delta_{s+k\,J} \, \mathbb{E}\left[ \frac{v_{j+d\,J-s}^2}{\varphi_{j_{h-s}}^2}\right]\\
&=\varphi_{h}^2 \sum_{s=0}^{\infty} \frac{\delta_s\, \delta_{s+k\,J}}{\varphi_{j_{h-s}}},
\end{split}
\ee
with $\delta_q$ such that $ \sum_{q=0}^{\infty}\delta_q\,B^q =\tonde{1-\alpha_H\tonde{B}}^{-1}$.
From equality \eqref{eq:cov_stat} we see that, for a given $h$, the process $\tonde{S_{h+d\,J}}_{d\in\graffe{0,1,...,D-1}}$ is covariance stationary,
that is the coefficients
$$
\gamma_{k}^{(h)}\df \mathbb{E}[(S_{h+d\,J}-\varphi_{h})(S_{h+(d+k)\,J}-\varphi_{h})]=
\mathbb{E}[Y_{d}^{\tonde{h}}\,Y_{d+k}^{\tonde{h}}],
$$
depend only on the lag $k$, and not on $d$. Moreover, the process $\tonde{S_{h+d\,J}}_{d\in\graffe{0,1,...,D-1}}$ is not a real long memory process, therefore the autocorrelation is absolutely summable, whence
the consistency result \eqref{eq:consistency}
$$
\widehat{\varphi}_{h}\df\frac{1}{D}\sum_{d=0}^{D-1} S_{h+d\,J}\stackrel{p}{\to} \varphi_{h}\textrm{ and as }D\to \infty,
$$
follows from Proposition 7.5 in \cite{hamilton1994time}.\\
\end{appendices}
\end{document}
\selectlanguage{english}
\FloatBarrier
\bibliographystyle{plainnat}
\bibliography{bibliography/converted_to_latex.bib%
}
\end{document}