## Copyright (C) 2012-2015 Nir Krakauer
##
## This function is free software; you can redistribute it and/or modify it
## under the terms of the GNU General Public License as published by
## the Free Software Foundation; either version 3 of the License, or (at
## your option) any later version.
##
## This function is distributed in the hope that it will be useful, but
## WITHOUT ANY WARRANTY; without even the implied warranty of
## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
## General Public License, , for more details.
## -*- texinfo -*-
## @deftypefn {Function File} @var{A} = svd_impute(@var{A_orig})
## Fill missing values in an @var{m} by @var{n} matrix @var{A_orig}, with @code{m >= n}, through iterative singular value decomposition. Returns the filled-in matrix.
##
## Reference: Nir Y. Krakauer (2012), Estimating Climate Trends: Application to United States Plant Hardiness Zones, Advances in Meteorology, 404876, doi: 10.1155/2012/404876
## @end deftypefn
## Author: Nir Krakauer
## Description: imputation of singular values by iterative singular value decomposition
function A = svd_impute(A_orig)
A = A_orig;
[m, n] = size(A);
[A,mu_total] = center(A, 2);
max_A_orig = max(A(:));
min_A_orig = min(A(:));
%fill any missing values with the average trend using available values
A(isnan(A)) = mean(A, 1)(ones(m, 1), :)(isnan(A));
dist = 1;
dist_tol = 1E-3;
iter = 0;
iters_max = 100;
while dist > dist_tol && iter < iters_max
%SVD
[U, S, V] = svd(A, 0);
%recompute V based only on non-missing values
%observe that if A is of full rank, V = A'*U*inv(S)
%V = s(1:k)
V_new = V;
for i = 1:n
ii = isfinite(A_orig(:, i));
# try
V_new(i, :) = (U(ii, :) * S) \ A(ii, i);
# catch
# keyboard
# end_try_catch
end
V_new(isnan(V_new)) = 0;
%distance between A and A_new
%dist = norm(V - V_new) / norm(V_new)
A_old = A + mu_total(:, ones(n, 1));
%newly filled-in matrix
A = U * S * V_new';
A(A > max_A_orig) = max_A_orig; %limiting the size of elements helps stability
A(A < min_A_orig) = min_A_orig;
A = A + mu_total(:, ones(n, 1));
ii = isfinite(A_orig);
A(ii) = A_orig(ii);
ii = ~ii;
dist = norm(A(ii) - A_old(ii)) / norm(A(ii))
%re-center A
[A,mu_total] = center(A, 2);
%max(abs(A(:)))
%sum(abs(A(:)) < 1)/numel(A)
iter = iter + 1;
end
%add the average back
A = A + mu_total(:, ones(n, 1));
disp([num2str(iter) ' iterations']) %display number of iterations