#include "EXTERN.h" #include "perl.h" #include "XSUB.h" /* The Perl include files perl.h redefines malloc and free. Here, we need the * usual malloc and free, defined in stdlib.h. So we undefine the ones in * perl.h. */ #ifdef malloc #undef malloc #endif #ifdef free #undef free #endif #include #include #include "../src/cluster.h" /* ------------------------------------------------- * Using the warnings registry, check to see if warnings * are enabled for the Algorithm::Cluster module. */ static int warnings_enabled(pTHX) { dSP; I32 count; bool isEnabled; SV * mysv; ENTER ; SAVETMPS; PUSHMARK(SP) ; XPUSHs(sv_2mortal(newSVpv("Algorithm::Cluster",18))); PUTBACK ; count = perl_call_pv("warnings::enabled", G_SCALAR) ; if (count != 1) croak("No arguments returned from call_pv()\n") ; mysv = POPs; isEnabled = (bool) SvTRUE(mysv); PUTBACK ; FREETMPS ; LEAVE ; return isEnabled; } /* ------------------------------------------------- * Create a row of doubles, initialized to a value */ static double* malloc_row_dbl(pTHX_ int ncols, double val) { int j; double * row; row = malloc(ncols * sizeof(double) ); for (j = 0; j < ncols; j++) { row[j] = val; } return row; } /* ------------------------------------------------- * Only coerce to a double if we already know it's * an integer or double, or a string which is actually numeric. * Don't blindly run the macro SvNV, because that will coerce * a non-numeric string to be a double of value 0.0, * and we do not want that to happen, because if we test it again, * it will then appear to be a valid double value. */ static int extract_double_from_scalar(pTHX_ SV * mysv, double * number) { if (SvPOKp(mysv) && SvLEN(mysv)) { /* This function is not in the public perl API */ if (Perl_looks_like_number(aTHX_ mysv)) { *number = SvNV( mysv ); return 1; } else { return 0; } } else if (SvNIOK(mysv)) { *number = SvNV( mysv ); return 1; } else { return 0; } } /* ------------------------------------------------- * Convert a Perl 2D matrix into a 2D matrix of C doubles. * NOTE: on errors this function returns a value greater than zero. */ static double** parse_data(pTHX_ SV * matrix_ref) { AV * matrix_av; SV * row_ref; AV * row_av; SV * cell; int type, i, j, nrows, ncols, n; double** matrix; /* NOTE -- we will just assume that matrix_ref points to an arrayref, * and that the first item in the array is itself an arrayref. * The calling perl functions must check this before we get this pointer. * (It's easier to implement these checks in Perl rather than C.) * The value of perl_rows is now fixed. But the value of * rows will be decremented, if we skip any (invalid) Perl rows. */ matrix_av = (AV *) SvRV(matrix_ref); nrows = (int) av_len(matrix_av) + 1; if(nrows <= 0) { return NULL; /* Caller must handle this case!! */ } row_ref = *(av_fetch(matrix_av, (I32) 0, 0)); row_av = (AV *) SvRV(row_ref); ncols = (int) av_len(row_av) + 1; matrix = malloc(nrows*sizeof(double*)); /* ------------------------------------------------------------ * Loop once for each row in the Perl matrix, and convert it to * C doubles. */ for (i=0; i < nrows; i++) { row_ref = *(av_fetch(matrix_av, (I32) i, 0)); if(! SvROK(row_ref) ) { if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d: Wanted array reference, but " "got a scalar. No row to process?\n", i); break; } row_av = (AV *) SvRV(row_ref); type = SvTYPE(row_av); /* Handle unexpected cases */ if(type != SVt_PVAV ) { /* Reference doesn't point to an array at all. */ if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d: Wanted array reference, but got " "a reference to something else (%d)\n", i, type); break; } n = (int) av_len(row_av) + 1; if (n != ncols) { /* All rows in the matrix should have the same * number of columns. */ if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d: Contains %d columns " "(expected %d)\n", i, n, ncols); break; } matrix[i] = malloc(ncols*sizeof(double)); /* Loop once for each cell in the row. */ for (j=0; j < ncols; j++) { double num; cell = *(av_fetch(row_av, (I32) j, 0)); if(extract_double_from_scalar(aTHX_ cell,&num) <= 0) { if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d col %d: Value is not " "a number.\n", i, j); free(matrix[i]); /* not included below */ break; } matrix[i][j] = num; } /* End for (j=0; j < ncols; j++) */ if (j < ncols) break; } /* End for (i=0; i < nrows; i++) */ if (i < nrows) { /* encountered a break */ nrows = i; for (i = 0; i < nrows; i++) free(matrix[i]); free(matrix); matrix = NULL; } return matrix; } /* ------------------------------------------------- * Convert a Perl 2D matrix into a 2D matrix of C ints. * On errors this function returns a value greater than zero. */ static int** parse_mask(pTHX_ SV * matrix_ref) { AV * matrix_av; SV * row_ref; AV * row_av; SV * cell; int type, i, j, nrows, ncols, n; int** matrix; /* NOTE -- we will just assume that matrix_ref points to an arrayref, * and that the first item in the array is itself an arrayref. * The calling perl functions must check this before we get this pointer. * (It's easier to implement these checks in Perl rather than C.) * The value of perl_rows is now fixed. But the value of * rows will be decremented, if we skip any (invalid) Perl rows. */ matrix_av = (AV *) SvRV(matrix_ref); nrows = (int) av_len(matrix_av) + 1; if(nrows <= 0) { return NULL; /* Caller must handle this case!! */ } row_ref = *(av_fetch(matrix_av, (I32) 0, 0)); row_av = (AV *) SvRV(row_ref); ncols = (int) av_len(row_av) + 1; matrix = malloc(nrows * sizeof(int *) ); /* ------------------------------------------------------------ * Loop once for each row in the Perl matrix, and convert it to C ints. */ for (i=0; i < nrows; i++) { row_ref = *(av_fetch(matrix_av, (I32) i, 0)); if(! SvROK(row_ref) ) { if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d: Wanted array reference, but " "got a scalar. No row to process?\n", i); break; } row_av = (AV *) SvRV(row_ref); type = SvTYPE(row_av); /* Handle unexpected cases */ if(type != SVt_PVAV ) { /* Reference doesn't point to an array at all. */ if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d: Wanted array reference, but got " "a reference to something else (%d)\n", i, type); break; } n = (int) av_len(row_av) + 1; if (n != ncols) { /* All rows in the matrix should have the same * number of columns. */ if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d: Contains %d columns " "(expected %d)\n", i, n, ncols); break; } matrix[i] = malloc(ncols * sizeof(int) ); /* Loop once for each cell in the row. */ for (j=0; j < ncols; ++j) { double num; cell = *(av_fetch(row_av, (I32) j, 0)); if(extract_double_from_scalar(aTHX_ cell,&num) <= 0) { if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d col %d: Value is not " "a number.\n", i, j); free(matrix[i]); /* not included below */ break; } matrix[i][j] = (int) num; } /* End for (j=0; j < ncols; j++) */ if (j < ncols) break; } /* End for (i=0; i < nrows; i++) */ if (i < nrows) { /* break statement encountered */ nrows = i; for (i = 0; i < nrows; i++) free(matrix[i]); free(matrix); matrix = NULL; } return matrix; } /* ------------------------------------------------- * */ static void free_matrix_int(int ** matrix, int nrows) { int i; for(i = 0; i < nrows; ++i ) { free(matrix[i]); } free(matrix); } /* ------------------------------------------------- * */ static void free_matrix_dbl(double ** matrix, int nrows) { int i; for(i = 0; i < nrows; ++i ) { free(matrix[i]); } free(matrix); } /* ------------------------------------------------- * */ static void free_ragged_matrix_dbl(double ** matrix, int nrows) { int i; for(i = 1; i < nrows; ++i ) { free(matrix[i]); } free(matrix); } /* ------------------------------------------------- * For debugging */ static void print_matrix_dbl(pTHX_ double ** matrix, int rows, int columns) { int i,j; for (i = 0; i < rows; i++) { printf("Row %3d: ",i); for (j = 0; j < columns; j++) { printf(" %7.2f", matrix[i][j]); } printf("\n"); } } /* ------------------------------------------------- * For debugging */ static SV* format_matrix_dbl(pTHX_ double ** matrix, int rows, int columns) { int i,j; SV * output = newSVpv("", 0); for (i = 0; i < rows; i++) { sv_catpvf(output, "Row %3d: ", i); for (j = 0; j < columns; j++) { sv_catpvf(output, " %7.2f", matrix[i][j]); } sv_catpvf(output, "\n"); } return(output); } /* ------------------------------------------------- * Convert a Perl array into an array of doubles * On error, this function returns NULL. */ static double* malloc_row_perl2c_dbl (pTHX_ SV * input, int* np) { int i; AV* array = (AV *) SvRV(input); const int n = (int) av_len(array) + 1; double* data = malloc(n * sizeof(double)); /* Loop once for each item in the Perl array, and convert * it to a C double. */ for (i=0; i < n; i++) { double num; SV * mysv = *(av_fetch(array, (I32) i, (I32) 0)); if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) { data[i] = num; } else { /* Error reading data */ if (warnings_enabled(aTHX)) Perl_warn(aTHX_ "Error parsing array: item %d is not a number\n", i); free(data); return NULL; } } if(np) *np = n; return data; } /* ------------------------------------------------- * Convert a Perl array into an array of ints * On errors this function returns NULL. */ static int* malloc_row_perl2c_int (pTHX_ SV * input) { int i; AV* array = (AV *) SvRV(input); const int n = (int) av_len(array) + 1; int* data = malloc(n*sizeof(int)); if(!data) return NULL; /* Loop once for each item in the Perl array, * and convert it to a C double. */ for (i=0; i < n; i++) { double num; SV * mysv = *(av_fetch(array, (I32) i, (I32) 0)); if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) { data[i] = (int) num; } else { /* Check if the item is numeric */ if (warnings_enabled(aTHX)) Perl_warn(aTHX_ "Error when parsing array: item %d is" " not a number, skipping\n", i); free(data); return NULL; } } return data; } /* ------------------------------------------------- * Copy a Perl array into an array of ints. * If an error occurs, return 0; otherwise return 1. */ static int copy_row_perl2c_int (pTHX_ SV * input, int* output) { int i; AV* array = (AV *) SvRV(input); const int n = (int) av_len(array) + 1; /* Loop once for each item in the Perl array, * and convert it to a C double. */ for (i=0; i < n; i++) { double num; SV * mysv = *(av_fetch(array, (I32) i, (I32) 0)); if(extract_double_from_scalar(aTHX_ mysv,&num) > 0) { output[i] = (int) num; } else { /* Skip any items which are not numeric */ if (warnings_enabled(aTHX)) Perl_warn(aTHX_ "Error when parsing array: item %d is" " not a number\n", i); return 0; } } return 1; } /* ------------------------------------------------- * */ static SV * row_c2perl_dbl(pTHX_ double * row, int ncols) { int j; AV * row_av = newAV(); for(j=0; j= 0) free(p[i]); free(p); return 0; } for (j = 0; j < ncols; j++) p[i][j] = 1; } *mask = p; } /* We don't check data_ref because we expect the caller to check it */ *matrix = parse_data(aTHX_ data_ref); if(*matrix==NULL) { if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Error parsing data matrix.\n"); return 0; } if(SvTYPE(SvRV(weight_ref)) == SVt_PVAV) { *weight = malloc_row_perl2c_dbl(aTHX_ weight_ref, NULL); if(!(*weight)) { Perl_warn(aTHX_ "Error reading weight array.\n"); return 0; } } else { *weight = malloc_row_dbl(aTHX_ nweights,1.0); } return 1; } static double** parse_distance(pTHX_ SV* matrix_ref, int nobjects) { int i,j; AV* matrix_av = (AV *) SvRV(matrix_ref); double** matrix = malloc(nobjects*sizeof(double*)); matrix[0] = NULL; for (i=1; i < nobjects; i++) { SV* row_ref = *(av_fetch(matrix_av, (I32) i, 0)); AV* row_av = (AV *) SvRV(row_ref); matrix[i] = malloc(i * sizeof(double)); /* Loop once for each cell in the row. */ for (j=0; j < i; j++) { double num; SV* cell = *(av_fetch(row_av, (I32) j, 0)); if(extract_double_from_scalar(aTHX_ cell,&num) > 0) { matrix[i][j] = num; } else { if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "Row %d col %d: Value is not " "a number.\n", i, j); break; } } } if (i < nobjects) { nobjects = i+1; for (i = 1; i < nobjects; i++) free(matrix[i]); free(matrix); matrix = NULL; } return matrix; } /******************************************************************************/ /** **/ /** XS code begins here **/ /** **/ /******************************************************************************/ /******************************************************************************/ MODULE = Algorithm::Cluster PACKAGE = Algorithm::Cluster PROTOTYPES: ENABLE SV * _hello() CODE: printf("Hello, world!\n"); RETVAL = newSVpv("Hello world!!\n", 0); OUTPUT: RETVAL int _readprint(input) SV * input; PREINIT: double ** matrix; /* two-dimensional matrix of doubles */ CODE: matrix = parse_data(aTHX_ input); if(matrix != NULL) { AV* matrix_av = (AV *) SvRV(input); SV * row_ref = *(av_fetch(matrix_av, (I32) 0, 0)); AV * row_av = (AV *) SvRV(row_ref); const int nrows = (int) av_len(matrix_av) + 1; const int ncols = (int) av_len(row_av) + 1; print_matrix_dbl(aTHX_ matrix,nrows,ncols); free_matrix_dbl(matrix,nrows); RETVAL = 1; } else { RETVAL = 0; } OUTPUT: RETVAL SV * _readformat(input) SV * input; PREINIT: double ** matrix; /* two-dimensional matrix of doubles */ CODE: matrix = parse_data(aTHX_ input); if(matrix != NULL) { AV* matrix_av = (AV *) SvRV(input); SV * row_ref = *(av_fetch(matrix_av, (I32) 0, 0)); AV * row_av = (AV *) SvRV(row_ref); const int nrows = (int) av_len(matrix_av) + 1; const int ncols = (int) av_len(row_av) + 1; RETVAL = format_matrix_dbl(aTHX_ matrix,nrows,ncols); free_matrix_dbl(matrix,nrows); } else { RETVAL = newSVpv("",0); } OUTPUT: RETVAL SV * _mean(input) SV * input; PREINIT: int array_length; double * data; /* one-dimensional array of doubles */ CODE: if(SvTYPE(SvRV(input)) != SVt_PVAV) { XSRETURN_UNDEF; } data = malloc_row_perl2c_dbl (aTHX_ input, &array_length); if (data) { RETVAL = newSVnv( mean(array_length, data) ); free(data); } else { RETVAL = newSVnv( 0.0 ); } OUTPUT: RETVAL SV * _median(input) SV * input; PREINIT: int array_length; double * data; /* one-dimensional array of doubles */ CODE: if(SvTYPE(SvRV(input)) != SVt_PVAV) { XSRETURN_UNDEF; } data = malloc_row_perl2c_dbl (aTHX_ input, &array_length); if(data) { RETVAL = newSVnv( median(array_length, data) ); free(data); } else { RETVAL = newSVnv( 0.0 ); } OUTPUT: RETVAL void _treecluster(nrows,ncols,data_ref,mask_ref,weight_ref,transpose,dist,method) int nrows; int ncols; SV * data_ref; SV * mask_ref; SV * weight_ref; int transpose; char * dist; char * method; PREINIT: Node* nodes; double * weight = NULL; double ** matrix = NULL; int ** mask = NULL; double ** distancematrix = NULL; const int ndata = transpose ? nrows : ncols; const int nelements = transpose ? ncols : nrows; PPCODE: /* ------------------------ * Don't check the parameters, because we rely on the Perl * caller to check most paramters. */ /* ------------------------ * Convert data and mask matrices and the weight array * from C to Perl. Also check for errors, and ignore the * mask or the weight array if there are any errors. */ if (is_distance_matrix(aTHX_ data_ref)) { distancematrix = parse_distance(aTHX_ data_ref, nelements); } else { malloc_matrices( aTHX_ weight_ref, &weight, ndata, data_ref, &matrix, mask_ref, &mask, nrows, ncols ); } /* ------------------------ * Run the library function */ nodes = treecluster(nrows, ncols, matrix, mask, weight, transpose, dist[0], method[0], distancematrix); /* ------------------------ * Check result to make sure we didn't run into memory problems */ if(!nodes) { /* treecluster failed due to insufficient memory */ if(warnings_enabled(aTHX)) Perl_warn(aTHX_ "treecluster failed due to insufficient memory.\n"); } else { /* ------------------------ * Convert generated C matrices to Perl matrices */ const int n = nelements-1; int i; SV* nodes_ref; SV* distances_ref; AV* matrix_av = newAV(); AV* row_av = newAV(); for(i=0; i