## Copyright (C) 2012-2015 Nir Krakauer ## ## This function is free software; you can redistribute it and/or modify it ## under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 3 of the License, or (at ## your option) any later version. ## ## This function is distributed in the hope that it will be useful, but ## WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ## General Public License, , for more details. ## -*- texinfo -*- ## @deftypefn {Function File} @var{A} = svd_impute(@var{A_orig}) ## Fill missing values in an @var{m} by @var{n} matrix @var{A_orig}, with @code{m >= n}, through iterative singular value decomposition. Returns the filled-in matrix. ## ## Reference: Nir Y. Krakauer (2012), Estimating Climate Trends: Application to United States Plant Hardiness Zones, Advances in Meteorology, 404876, doi: 10.1155/2012/404876 ## @end deftypefn ## Author: Nir Krakauer ## Description: imputation of singular values by iterative singular value decomposition function A = svd_impute(A_orig) A = A_orig; [m, n] = size(A); [A,mu_total] = center(A, 2); max_A_orig = max(A(:)); min_A_orig = min(A(:)); %fill any missing values with the average trend using available values A(isnan(A)) = mean(A, 1)(ones(m, 1), :)(isnan(A)); dist = 1; dist_tol = 1E-3; iter = 0; iters_max = 100; while dist > dist_tol && iter < iters_max %SVD [U, S, V] = svd(A, 0); %recompute V based only on non-missing values %observe that if A is of full rank, V = A'*U*inv(S) %V = s(1:k) V_new = V; for i = 1:n ii = isfinite(A_orig(:, i)); # try V_new(i, :) = (U(ii, :) * S) \ A(ii, i); # catch # keyboard # end_try_catch end V_new(isnan(V_new)) = 0; %distance between A and A_new %dist = norm(V - V_new) / norm(V_new) A_old = A + mu_total(:, ones(n, 1)); %newly filled-in matrix A = U * S * V_new'; A(A > max_A_orig) = max_A_orig; %limiting the size of elements helps stability A(A < min_A_orig) = min_A_orig; A = A + mu_total(:, ones(n, 1)); ii = isfinite(A_orig); A(ii) = A_orig(ii); ii = ~ii; dist = norm(A(ii) - A_old(ii)) / norm(A(ii)) %re-center A [A,mu_total] = center(A, 2); %max(abs(A(:))) %sum(abs(A(:)) < 1)/numel(A) iter = iter + 1; end %add the average back A = A + mu_total(:, ones(n, 1)); disp([num2str(iter) ' iterations']) %display number of iterations