Thursday, May 19, 2011

Create transition matrices by cohort approach and hazard rate approch

------------The cohort approach macro has a bug and I am working on it ------------------------------------
Cohort approach is a widely-used method in creating transition matrices for evaluation of credit risk. In the example below, I made a macro to automate this process. Although the code is for one-year period transition matrices, scorecards for multiple-year period can simply derive from it.
%macro cohort(filepath = );
   options mlogic mprint;
   data _tmp01;
      infile "&filepath\data.txt" delimiter='09'x missover dsd firstobs=2 ;
      informat id best12.; informat date mmddyy10.; informat rawrating $5.;
      input id date rawrating;
   run;
   proc format;
      value $rating 
      'NR   ' =   0
      'AAA'   =   1
      'AA+'   =   2
      'AA   ' =   2
      'AA-'   =   2
      'A+   ' =   3
      'A   '  =   3
      'A-   ' =   3
      'BBB+'  =   4
      'BBB'   =   4
      'BBB-'  =   4
      'BB+'   =   5
      'BB   ' =   5
      'BB-'   =   5
      'B+   ' =   6
      'B   '  =   6
      'B-   ' =   6
      'CCC+'  =   7
      'CCC'   =   7
      'CCC-'  =   7
      'CC   ' =   7
      'C   '  =   7
      'D   '  =   8
   ;;;
   run;
   data _tmp02;
      set _tmp01;
      year = year(date);
      rating = put(rawrating, $rating.);
      drop rawrating;
   run;

   proc sort data = _tmp02;
      by id year date;
   run;
   data _tmp03;
      set _tmp02;
      by id year;
      if last.year;
      drop date;
   run;
   proc sql;
      select min(year) into :y_start from _tmp03;
      select max(year)-1 into :y_end from _tmp03;
   quit;

   proc transpose data = _tmp03 out = _tmp04 prefix = year;
      by id;
      var rating;
      id year;
   run;
   data _tmp05;
      set _tmp04;
      %do num1 = &y_start %to &y_end;
         %let num2 = %eval(&num1 + 1);
         if year&num1 not in (., 0, 8) and year&num2 = . then year&num2 = year&num1;
      %end;
   run;

   proc transpose data = _tmp05 out = _tmp06;
      by id;
      var year:;
   run;
   proc sort data = _tmp06;
      by id _name_;
      where rating ne .;
   run;
   data _tmp07;
      set _tmp06;
      year = input(substr(_name_, 5, 4), 4.);
      drop _name_;
   run;

   proc sql;
      create table _tmp08 as
      select a.id, a.year as start_year, a.rating as start_rating, 
            b.year as end_year, b.rating as end_rating
      from _tmp07(where=(rating not in (0, 8))) as a, _tmp07 as b
      where a.id = b.id and b.year - a.year = 1
   ;quit;

   proc freq data = _tmp08;
      table start_rating * end_rating / nofreq nocol nopercent;
      ods output crosstabfreqs = _tmp09;
   run;
   data _tmp10;
      set _tmp09;
      length end_rating_char $3;
      end_rating_char = cats(end_rating, '');
      if end_rating = 0 then end_rating_char = 'NR';
      rowpercent = rowpercent / 100;
      where RowPercent is not missing;
   run;

   ods html file = "&filepath\tmatrice.xls" style = minimal;
   title; footnote;
   proc report data = _tmp10 nowd headline split = '|';
      col start_rating end_rating_char,RowPercent;
      define start_rating / group 'Rating at the|start of one year';
      define end_rating_char / across 'Rating at the end of one year';
      define rowpercent / '' format = percent8.2;
   run;
   ods html close;
%mend cohort;
For hazard rate approach, the generator matrix can be obtained by codes like:

data one;
    format date date9.;
    input Id Date : date9. Rating;
    datalines;
    1    30-May-00    7
    1    31-Dec-00    6
    2    21-May-03    1
    3    30-Dec-99    5
    3    30-Oct-00    6
    3    30-Dec-01    5
    4    30-Dec-01    5
    4    30-May-02    6
    5    24-May-00    2
    5    30-May-01    3
    5    30-Oct-01    2
    6    30-Dec-99    4
    6    30-Dec-01    4
    7    30-Dec-02    4
    7    23-Jun-03    5
    7    30-Dec-03    6
    7    21-May-04    5
    8    30-Dec-02    3
    9    21-May-00    2
    9    30-Dec-00    0
    10   30-Dec-04    5
    11  30-Dec-99    5
    11  30-Dec-01    6
    11  21-May-02    7
    11  30-Sep-02    8
    12  30-Dec-00    0
    13  30-Dec-99    4
    13  30-May-03    5
    14  21-Sep-99    5
    14  30-Dec-99    5
    14  30-Dec-01    5
    14  26-May-02    6
    14  21-May-04    5
    ;
run;

proc sort data = one;
   by id date;
run;

proc sql noprint;
   select max(date) into: maxdat
   from one;
quit;
data two(drop=nextid nextdat);
   merge one one(firstobs = 2 rename=(rating=nextrat id=nextid date=nextdat));
   if id ne nextid then nextrat = rating;
   if id = nextid then spell = nextdat - date;
   else spell = &maxdat - date;
   spell = spell / 365;
run;

proc sql;
   create table three as
   select a.*, a.count / b.sumspell as ratio
   from (select rating, nextrat, count(*) as count
           from two group by rating, nextrat) as a,
        (select rating, sum(spell) as sumspell
           from two group by rating) as b
   where a.rating = b.rating
;quit;

proc sql;
   create table four as
   select rating, rating as nextrat, sum(ratio) as sumratio
   from three
   where rating ne nextrat
   group by rating
;quit;

data five;
   merge three four;
   by rating nextrat;
   if rating = nextrat then ratio = -sumratio;
run;

options ls = 256 nocenter missing = 0;
proc report data = five nowd headline;
   title 'Generator maxtrix';
   columns rating nextrat,ratio ;
   define rating / group 'From';
   define nextrat / across 'To';
   define ratio / ' ' format = 8.2;
run;

Good math, bad engineering

As a formal statistician and a current engineer, I feel that a successful engineering project may require both the mathematician’s abilit...