Monday, December 15, 2008

Vertical collapse by five methods

******************(1) INPUT STEP***********;
data have;
  input id: $ string: $;
  cards;
  001 aaa 
  001 bbb 
  002 ccccc 
  002 dddd 
  002 eee 
  003 ffff 
  004 gggggg
  ;
run;

*******************(2) CONCATENATION STEP ***********;
***********(2.1) METHOD I: do-loop and substr()***********;
data want1(drop = string);
    length newstring $50.; 
    do _n_ = 1 by 1 until(last.id);
        set have;
        by id notsorted;
        substr(newstring,length(newstring) + 1) = string;
    end;
run;

***********(2.2) METHOD II: Proc Transpose***********;
proc transpose data = have out = _tmp;
   by id;
   var string;
run;

data want2;
   set _tmp;
   newstring = cats(of col:);
   drop _: col:;
run;

***********(2.3) METHOD III: retain statement***********;
data want3(drop = string);
  set have;
  by id notsorted;
  length newstring $50.;
  retain newstring ;
  if first.id then newstring = string;
  else newstring = cats(newstring, string);
  if last.id;
run;

***********(2.4) METHOD IV: Hash table***********;
data _null_;
  length newstring $50;
  if _n_ =1 then do;
  declare hash h();
    h.defineKey('id');
    h.defineData('id', 'newstring');
    h.defineDone();
  end;
  set have end = eof ;
  if h.find() ne 0 then do;
    newstring = string;
    h.add();
  end;
  else do;
    newstring = cats(newstring, string);
    h.replace();
  end;
  if eof then h.output(dataset: 'want4');
run;

***********(2.5) METHOD V: SQL and macro***********;
proc sql noprint;
  select count(unique(id)) into: idnum 
  from have;
  select distinct id into: allid separated by ', ' 
  from have;
quit;

%macro concatenate();
  %let id = scan("&allid", &i);
  %do i = 1 %to &idnum;
    proc sql noprint;
      select string into: newstring separated by ''
      from have
      where id = &id;
    quit;
  %put &newstring;
  %end;
%mend;
%concatenate();

*********************END OF ALL CODING******************************;
References:
1. Technique board. Mysas.net.

Saturday, December 13, 2008

Proc GLM v.s. IML in ANOVA

data thrust;
    input speed100 speed250 speed400 speed550;
    if _n_ le 15 then feed=0.015;
    if _n_ le 10 then feed=0.010;
    if _n_ le 5 then feed=0.005;
    cards;
     121 98 83 58
     124 108 81 59
     104 87 88 60
     124 94 90 66
     110 91 86 56
     329 291 281 265
     331 265 278 265
     324 295 275 269
     338 288 276 260
     332 297 287 251
     640 569 551 487
     600 575 552 481
     612 565 570 487
     620 573 546 500
     623 588 569 497
    ;
run;

data thrust2;
    set thrust;
    array speedarray(4) speed:;
    do i = 1 to 4;
       force = speedarray(i);
       output;
    end;
    drop speed:;
run;

data thrust3;
    set thrust2;
    if i = 1 then speed = 100;
    if i = 2 then speed = 250;
    if i = 3 then speed = 400;
    if i = 4 then speed = 550;
    drop i;
run;

proc sort data=thrust3 out=thrust4;
    by feed speed;
run;

proc glm data=thrust3;
    title 'GLM solution';
    class feed speed;
    model force= feed speed feed*speed;
run;

proc iml;
    title 'IML solution';
    use thrust4;
    read all var{force} into y;
    Xzero=J(60,1);
    Xfeed=I(3)@j(4,1)@j(5,1);
    Xspeed=j(3,1)@I(4)@j(5,1);
    Xint=I(3)@I(4)@j(5,1);
    Xfull=Xzero||Xfeed||Xspeed||Xint;

    SSE=y`*y-y`*Xfull*ginv(Xfull`*Xfull)*Xfull`*y;
    SSTrtFeed=y`*((I(3)-(1/3)*J(3,3))@((1/4)*J(4,4))@((1/5)*J(5,5)))*y;
    SSTrtSpeed=y`*(((1/3)*J(3,3))@(I(4)-(1/4)*J(4,4))@((1/5)*J(5,5)))*y;
    SSInt=y`*((I(3)-(1/3)*J(3,3))@(I(4)-(1/4)*J(4,4))@((1/5)*J(5,5)))*y;
    SSTotal=y`*(I(60)-(1/60)*J(60,60))*y;

    print SSTrtFeed SSTrtSpeed SSInt SSE SSTotal;

    MSTrtFeed=SSTrtFeed/2; MSTrtSpeed=SSTrtSpeed/3; MSInt=SSInt/6; MSE=SSE/48; 
    print MSTrtFeed MSTrtSpeed MSInt MSE ;

    F_TrtFeed=MSTrtFeed/MSE; F_TrtSpeed=MSTrtSpeed/MSE; F_Int=MSInt/MSE;
    Print  F_TrtFeed F_TrtSpeed F_Int;

    P_TrtFeed=1-probf(F_TrtFeed,2,48); 
    P_TrtSpeed=1-probf(F_TrtSpeed,3,48); 
    P_Int=1-probf(F_Int,6,48);
    Print P_TrtFeed P_TrtSpeed P_Int;
run;quit;

Wednesday, December 3, 2008

Some tips about Proc Printto and SAS memory

Run SAS Proc Options would have the answers.
/* Specifies the limit on the total amount of memory to be used by the SAS Syste*/
proc options option=MEMSIZE; run;
/*Upper limit for data-dependent memory usage during summarization*/
proc options option=SUMSIZE; run;
/*  Upper limit for memory during sorting*/
proc options option=SORTSIZE; run;


/*Output saslog and saslist to specified locations*/
proc printto log="C:\user\myname\mylog.log" print="C:\user\myname\mylist.lst" new;
run;
/*Open the files with notpads directly*/
proc printto log="C:\user\myname\mylog.txt" print="C:\user\myname\mylist.txt" new;
run;
/*A method to suppress log generation during execuation, especially for simulation*/
filename supress dummy;
proc printto log=supress;
run;
/*Reopen the default setting*/
proc printto;
run;

Good math, bad engineering

As a formal statistician and a current engineer, I feel that a successful engineering project may require both the mathematician’s abilit...