Proof of clustering loss in terms of centroids
First, note that
\begin{align}
\norm{\vx_\nunlabeledsmall - \vx_{\dash{\nunlabeledsmall}}}{}^2 &= \left(\vx_\nunlabeledsmall - \vx_{\dash{\nunlabeledsmall}} \right)^T \left(\vx_\nunlabeledsmall - \vx_{\dash{\nunlabeledsmall}} \right) \\\\
&= \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall- 2\vx_\nunlabeledsmall^T\dash{\vx_\nunlabeledsmall} + \vx_\nunlabeledsmall^{'T}\dash{\vx_\nunlabeledsmall}
\end{align}
Now consider the inner sum in \( W(\sC) \).
\begin{align}
\sum_{C(\dash{\nunlabeledsmall})=k} \norm{\vx_\nunlabeledsmall - \vx_{\dash{\nunlabeledsmall}}}{}^2 &= \sum_{C(\dash{\nunlabeledsmall})=k} \left[ \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall- 2\vx_\nunlabeledsmall^T\dash{\vx_\nunlabeledsmall} + \vx_\nunlabeledsmall^{'T}\dash{\vx_\nunlabeledsmall} \right] \\\\
&= \nunlabeled_k\vx_\nunlabeledsmall^T\vx_\nunlabeledsmall- 2\vx_\nunlabeledsmall^T \left[\sum_{C(\dash{\nunlabeledsmall})=k} \dash{\vx_\nunlabeledsmall}\right] + \sum_{C(\dash{\nunlabeledsmall})=k} \vx_\nunlabeledsmall^{'T}\dash{\vx_\nunlabeledsmall} \\\\
&= \nunlabeled_k\vx_\nunlabeledsmall^T\vx_\nunlabeledsmall- 2\nunlabeled_k\vx_\nunlabeledsmall^T\bar{\vx}_k + \sum_{C(\dash{\nunlabeledsmall})=k} \vx_\nunlabeledsmall^{'T}\dash{\vx_\nunlabeledsmall}
\end{align}
Now let's sum this result over the middle sum involving \( \sum_{C(\nunlabeledsmall)=k} \)
\begin{align}
\sum_{C(\nunlabeledsmall)=k} \sum_{C(\dash{\nunlabeledsmall})=k} \norm{\vx_\nunlabeledsmall - \vx_{\dash{\nunlabeledsmall}}}{}^2 &= \sum_{C(\nunlabeledsmall)=k} \left[\nunlabeled_k\vx_\nunlabeledsmall^T\vx_\nunlabeledsmall- 2\nunlabeled_k\vx_\nunlabeledsmall^T\bar{\vx}_k + \sum_{C(\dash{\nunlabeledsmall})=k} \vx_\nunlabeledsmall^{'T}\dash{\vx_\nunlabeledsmall} \right] \\\\
&= \nunlabeled_k \left[ \sum_{C(\nunlabeledsmall)=k} \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall \right] - 2\nunlabeled_k\left[ \sum_{C(\nunlabeledsmall)=k}\vx_\nunlabeledsmall \right]^T\bar{\vx}_k + \nunlabeled_k \left[\sum_{C(\dash{\nunlabeledsmall})=k} \vx_\nunlabeledsmall^{'T}\dash{\vx_\nunlabeledsmall} \right] \\\\
&= 2\nunlabeled_k \left[ \sum_{C(\nunlabeledsmall)=k} \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall \right] - 2\nunlabeled_k\left[\nunlabeled_k\bar{\vx}_k\right]^T\bar{\vx}_k \\\\
&= 2 \nunlabeled_k \left[\sum_{C(\nunlabeledsmall)=k} \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall - \nunlabeled_k\bar{\vx}_k^T\bar{\vx}_k\right]
\end{align}
Now, note that
\begin{align}
\sum_{C(\nunlabeledsmall)=k} \norm{\vx_\nunlabeledsmall - \bar{\vx}_k}{}^2 &= \sum_{C(\nunlabeledsmall)=k} \left[ \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall- 2\vx_\nunlabeledsmall^T\bar{\vx}_k + \bar{\vx}_k^T \bar{\vx}_k\right] \\\\
&= \nunlabeled_k \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall - 2\left[ \sum_{C(\nunlabeledsmall)=k} \vx_\nunlabeledsmall \right]^T \bar{\vx}_k + \nunlabeled_k \bar{\vx}_k^T \bar{\vx}_k \\\\
&= \nunlabeled_k \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall - 2\nunlabeled_k \bar{\vx}_k^T \bar{\vx}_k + \nunlabeled_k \bar{\vx}_k^T \bar{\vx}_k \\\\
&= \nunlabeled_k \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall - \nunlabeled_k \bar{\vx}_k^T \bar{\vx}_k \\\\
\end{align}
Substituting this result in the previous expression, we get
\begin{align}
\sum_{C(\nunlabeledsmall)=k} \sum_{C(\dash{\nunlabeledsmall})=k} \norm{\vx_\nunlabeledsmall - \vx_{\dash{\nunlabeledsmall}}}{}^2 &=2 \nunlabeled_k \left[\sum_{C(\nunlabeledsmall)=k} \vx_\nunlabeledsmall^T\vx_\nunlabeledsmall - \nunlabeled_k\bar{\vx}_k^T\bar{\vx}_k\right] \\\\
&= 2 \nunlabeled_k \sum_{C(\nunlabeledsmall)=k} \norm{\vx_\nunlabeledsmall - \bar{\vx}_k}{}^2
\end{align}
Thus, the within point scatter loss function can be written as
\begin{equation}
W(\sC) = \sum_{k=1}^K \nunlabeled_k \sum_{C(\nunlabeledsmall)=k} \norm{\vx_\nunlabeledsmall - \bar{\vx}_k}{}^2
\label{eqn:loss-centroids-proved}
\end{equation}