/* File to illustrate how well the LOESS smooth recaptures a known variance function,

   when data sets with different sample sizes are sampled. */

 

%let std_dev = 10 + 2*X;   /* Change this to anything you want, as long as it is a positive function over the

                            range -5 < x < 5.   Make it a constant, like  "std_dev = 10;" to see how

                            the method works in the case of homoscedasticity.   */

%let n = 200;             /* Change this one too. You should see the LOESS curve recover     

                                   the true curve better when n is larger. */

 

%let beta0 = 1; %let beta1 = 1;  /* These parameter settings make no difference here. */


title
"True standard deviation function is &std_dev;  n = &n";

data simu;

   do i = 1 to &n;

       x = rannor(0);

       epsilon = (&std_dev)*rannor(0);    

       y = &beta0 + &beta1*x + epsilon;

       output;

    end;

run;

 

proc reg data=simu;

   model y=x;

   output out=new residual=e;

run;

 

data new;

   set new;

   normed_absolute_residual = 1.253314*abs(e);  /* The constant 1.253314 is not essential. But since StdDev(Z) = 1.253314*E(|Z|), it lets you recover the precise Std Dev function. */

   true_std_dev = &std_dev;

run;

 

proc sort data=new;

   by x;
run;

 

proc sgplot data=new;

    scatter y=normed_absolute_residual x=x;

    loess y=normed_absolute_residual x=x /nomarkers;

    series y = true_std_dev x = x;

run;

 

title;