% function[xclean,Qa]=rmspikes(x,settings) %rmspikes: stand-alone program to remove spiked from an array % Identify and remove points N stds beyond the mean % from hydratools thumbringer.m % [xclean,Qa]=rmspikes(x,settings); % % Inputs % x = the dirty data set, a structure defined as % settings = the fine controls % settings.nsd = 2; % (default) num std. deviations that defines outliers % settings.rvalue = 'mean'; % replacement value to use instead of NaN % can be 'mean', 'median', 'interp' or a numeric value % make settings.rvalue=1e35 to set to missing value % % xclean = the fixed data set % Qa = the feedback on what happened % Qa.nbad = the number of bad points % Qa.delmean = the ratio of the means before and after points were replaced % Qa.delvar = the ratio of the variance before and after points were replaced % Qa.stdr = standard deviation of the residuals % % originally written by Rachel Horwitz and Marinna Martini as part of % hydratools % latest mods, etm 12/6/07 % These programs are intened for us in adjusting metadata terms in % netCDF files from the USGS CMGP Oceanographic time-series data % archive % % Program written Matlab 7.6.0.342 (R2008a) % Program ran on Linux PC with RHEL4 and on Windows XP PC. % % "Although this program has been used by the USGS, no warranty, % expressed or implied, is made by the USGS or the United States % Government as to the accuracy and functioning of the program % and related program material nor shall the fact of distribution % constitute any such warranty, and no responsibility is assumed % by the USGS in connection therewith." function [xclean,Qa] = rmspikes(x,settings) % check inputs if exist('settings', 'var'), if isfield(settings,'nsd'), nsd = settings.nsd; end if isfield(settings, 'rvalue'), rvalue = settings.rvalue; end end if exist('nsd','var')~=1, nsd = 2; end if exist('rvalue','var')~=1, rvalue = 'mean'; end Qa = []; warning off MATLAB:divideByZero; % make sure the shape is consistent nrows = size(x, 1); if nrows == 1, x = x'; end xclean = x; stdx = gstd(x); medx = gmedian(x); %12/07 this seems to work better - etm spikes = find(abs(x) > medx+nsd*stdx | abs(x) < medx-nsd*stdx); % end etm mods if ~isempty(spikes) % find the non-spike data and replace with that value ns=find(~(ismember([1:1:length(x)],spikes)) == 1); if ~ischar(rvalue), xclean(spikes) = ones(size(spikes)).*rvalue; elseif strcmp(rvalue, 'median'), xclean(spikes) = ones(size(spikes)).*gmedian(x(ns)); elseif strcmp(rvalue, 'mean'), xclean(spikes) = ones(size(spikes)).*gmean(x(ns)); else % almost interpolate if spikes(end) > length(x)-2 for j=1:length(spikes) if j