这是本文档旧的修订版!
The exponential family of distributions over $\vec{x}$, given parameters $\vec{\eta}$, is defined to be the set of distributions of the form
\begin{eqnarray} \label{exponential_family} p(\vec{x} | \vec{\eta}) & = & h(\vec{x}) g(\vec{\eta}) \exp \left \{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right \} \end{eqnarray}
where $\vec{x}$ may be scalar or vector, and may be discrete or continuous. Here $\vec{\eta}$ are called the natural parameters of the distribution, and $h(\vec{x})$ and $\vec{u}(\vec{x})$ is respective base measure and sufficient statistics. The $g(\vec{\eta})$ can be interpreted as the coefficient that ensures that the distribution is normalized and therefore satisfies
\begin{eqnarray} \label{normalization} g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} d \vec{x}} & = & 1 \end{eqnarray}
where the integration is replaced by summation if $\vec{x}$ is a discrete variable.
Taking the gradient of both sides of Eq. \ref{normalization} with respect to $\vec{\eta}$, we have
\begin{eqnarray} \label{first-derivative} \nabla g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} d \vec{x}} + g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} \vec{u}(\vec{x}) d \vec{x}} & = & 0 \end{eqnarray}
Rearranging, and making use of Eq. \ref{normalization} then gives
\begin{eqnarray} -\frac{1}{g(\vec{\eta})} \nabla g(\vec{\eta}) & = & g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} \vec{u}(\vec{x}) d \vec{x}} = \mathbb{E}[\vec{u}(\vec{x})] \end{eqnarray}
We therefore obtain the result
\begin{eqnarray} \label{expectation} - \nabla \ln g(\vec{\eta}) & = & \mathbb{E}[\vec{u}(\vec{x})] \end{eqnarray}
Taking the gradient of both sides of Eq. \ref{first-derivative} with respect to $\vec{\eta}$, we have
\begin{equation} \label{second-derivative} \nabla \nabla g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} d \vec{x}} + \nabla g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} \vec{u}(\vec{x}) d \vec{x}} \\ + \nabla g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} \vec{u}(\vec{x}) d \vec{x}} + g(\vec{\eta}) \int {h(\vec{x}) \exp \left\{ \vec{\eta}^{\mathrm{T}} \vec{u}(\vec{x}) \right\} \vec{u}(\vec{x}) \vec{u}(\vec{x})^{\mathrm{T}} d \vec{x}} = 0 \\ \end{equation}
Rearranging, and making use again of Eq. \ref{normalization} then gives
\begin{eqnarray} \frac{1}{g(\vec{\eta})} \nabla \nabla g(\vec{\eta}) + \frac{1}{g(\vec{\eta})} \nabla g(\vec{\eta}) \mathbb{E}[\vec{u}(\vec{x})] + \frac{1}{g(\vec{\eta})} \nabla g(\vec{\eta}) \mathbb{E}[\vec{u}(\vec{x})] + \mathbb{E}[\vec{u}(\vec{x}) \vec{u}(\vec{x})^{\mathrm{T}}] & = & 0 \end{eqnarray}
Making use of Eq. \ref{expectation} then gives
\begin{eqnarray} \label{covariance-middle} \frac{1}{g(\vec{\eta})} \nabla \nabla g(\vec{\eta}) - 2 \mathbb{E}[\vec{u}(\vec{x})] \mathbb{E}[\vec{u}(\vec{x})^{\mathrm{T}}] + \mathbb{E}[\vec{u}(\vec{x}) \vec{u}(\vec{x})^{\mathrm{T}}] & = & 0 \end{eqnarray}
Now, let's first consider the following
\begin{eqnarray} \nabla \nabla \ln g(\vec{\eta}) & = & \nabla \frac{\nabla g(\vec{\eta})}{g(\vec{\eta})} \nonumber \\ & = & \frac{1}{g(\vec{\eta})} \nabla \nabla g(\vec{\eta}) - \left[ \frac{1}{g(\vec{\eta})} \nabla g(\vec{\eta}) \right] \left[ \frac{1}{g(\vec{\eta})} \nabla g(\vec{\eta}) \right]^{\mathrm{T}} \nonumber \\ & = & \frac{1}{g(\vec{\eta})} \nabla \nabla g(\vec{\eta}) - \mathbb{E}[\vec{u}(\vec{x})] \mathbb{E}[\vec{u}(\vec{x})^{\mathrm{T}}] \label{log-second-derivative} \end{eqnarray}
Rearranging Eq. \ref{log-second-derivative}, we obtain
\begin{eqnarray} \label{log-second} \frac{1}{g(\vec{\eta})} \nabla \nabla g(\vec{\eta}) & = & \nabla \nabla \ln g(\vec{\eta}) + \mathbb{E}[\vec{u}(\vec{x})] \mathbb{E}[\vec{u}(\vec{x})^{\mathrm{T}}] \end{eqnarray}
Inserting Eq. \ref{log-second} into \ref{covariance-middle}, and then we obtain
\begin{eqnarray} \label{covariance} - \nabla \nabla \ln g(\vec{\eta} & = & \mathbb{E}[\vec{u}(\vec{x}) \vec{u}(\vec{x})^{\mathrm{T}}] - \mathbb{E}[\vec{u}(\vec{x})] \mathbb{E}[\vec{u}(\vec{x})^{\mathrm{T}}] \end{eqnarray}
评论