|
hp2FEM
0.1
|
Various routines with dealing with CSR matrices. More...
#include <GKlib.h>
Defines | |
| #define | OMPMINOPS 50000 |
Functions | |
| gk_csr_t * | gk_csr_Create () |
| void | gk_csr_Init (gk_csr_t *mat) |
| void | gk_csr_Free (gk_csr_t **mat) |
| void | gk_csr_FreeContents (gk_csr_t *mat) |
| gk_csr_t * | gk_csr_Dup (gk_csr_t *mat) |
| gk_csr_t * | gk_csr_ExtractSubmatrix (gk_csr_t *mat, int rstart, int nrows) |
| gk_csr_t * | gk_csr_ExtractRows (gk_csr_t *mat, int nrows, int *rind) |
| gk_csr_t * | gk_csr_ExtractPartition (gk_csr_t *mat, int *part, int pid) |
| gk_csr_t ** | gk_csr_Split (gk_csr_t *mat, int *color) |
| gk_csr_t * | gk_csr_Read (char *filename, int format, int readvals, int numbering) |
| void | gk_csr_Write (gk_csr_t *mat, char *filename, int format, int writevals, int numbering) |
| gk_csr_t * | gk_csr_Prune (gk_csr_t *mat, int what, int minf, int maxf) |
| gk_csr_t * | gk_csr_LowFilter (gk_csr_t *mat, int what, int norm, float fraction) |
| gk_csr_t * | gk_csr_TopKPlusFilter (gk_csr_t *mat, int what, int topk, float keepval) |
| gk_csr_t * | gk_csr_ZScoreFilter (gk_csr_t *mat, int what, float zscore) |
| void | gk_csr_CompactColumns (gk_csr_t *mat) |
| void | gk_csr_SortIndices (gk_csr_t *mat, int what) |
| void | gk_csr_CreateIndex (gk_csr_t *mat, int what) |
| void | gk_csr_Normalize (gk_csr_t *mat, int what, int norm) |
| void | gk_csr_Scale (gk_csr_t *mat, int type) |
| void | gk_csr_ComputeSums (gk_csr_t *mat, int what) |
| void | gk_csr_ComputeSquaredNorms (gk_csr_t *mat, int what) |
| float | gk_csr_ComputeSimilarity (gk_csr_t *mat, int i1, int i2, int what, int simtype) |
| int | gk_csr_GetSimilarRows (gk_csr_t *mat, int nqterms, int *qind, float *qval, int simtype, int nsim, float minsim, gk_fkv_t *hits, int *i_marker, gk_fkv_t *i_cand) |
Various routines with dealing with CSR matrices.
$Id: csr.c 10537 2011-07-11 05:27:54Z karypis $
| void gk_csr_CompactColumns | ( | gk_csr_t * | mat | ) |
Compacts the column-space of the matrix by removing empty columns. As a result of the compaction, the column numbers are renumbered. The compaction operation is done in place and only affects the row-based representation of the matrix.
| mat | the matrix whose empty columns will be removed. |
| float gk_csr_ComputeSimilarity | ( | gk_csr_t * | mat, |
| int | i1, | ||
| int | i2, | ||
| int | what, | ||
| int | simtype | ||
| ) |
Computes the similarity between two rows/columns
| mat | the matrix itself. The routine assumes that the indices are sorted in increasing order. |
| i1 | is the first row/column, |
| i2 | is the second row/column, |
| what | is either GK_CSR_ROW or GK_CSR_COL indicating the type of objects between the similarity will be computed, |
| simtype | is the type of similarity and is one of GK_CSR_COS, GK_CSR_JAC, GK_CSR_MIN |
| void gk_csr_ComputeSquaredNorms | ( | gk_csr_t * | mat, |
| int | what | ||
| ) |
Computes the squared of the norms of the rows/columns
| mat | the matrix itself, |
| what | is either GK_CSR_ROW or GK_CSR_COL indicating which squared norms to compute. |
| void gk_csr_ComputeSums | ( | gk_csr_t * | mat, |
| int | what | ||
| ) |
Computes the sums of the rows/columns
| mat | the matrix itself, |
| what | is either GK_CSR_ROW or GK_CSR_COL indicating which sums to compute. |
| gk_csr_t* gk_csr_Create | ( | ) |
Allocate memory for a CSR matrix and initializes it
| void gk_csr_CreateIndex | ( | gk_csr_t * | mat, |
| int | what | ||
| ) |
Creates a row/column index from the column/row data.
| mat | the matrix itself, |
| what | is either GK_CSR_ROW or GK_CSR_COL indicating which index will be created. |
| gk_csr_t* gk_csr_Dup | ( | gk_csr_t * | mat | ) |
Returns a copy of a matrix.
| mat | is the matrix to be duplicated. |
| gk_csr_t* gk_csr_ExtractPartition | ( | gk_csr_t * | mat, |
| int * | part, | ||
| int | pid | ||
| ) |
Returns a submatrix corresponding to a specified partitioning of rows.
| mat | is the original matrix. |
| part | is the partitioning vector of the rows. |
| pid | is the partition ID that will be extracted. |
| gk_csr_t* gk_csr_ExtractRows | ( | gk_csr_t * | mat, |
| int | nrows, | ||
| int * | rind | ||
| ) |
Returns a submatrix containing a certain set of rows.
| mat | is the original matrix. |
| nrows | is the number of rows to extract. |
| rind | is the set of row numbers to extract. |
| gk_csr_t* gk_csr_ExtractSubmatrix | ( | gk_csr_t * | mat, |
| int | rstart, | ||
| int | nrows | ||
| ) |
Returns a submatrix containint a set of consecutive rows.
| mat | is the original matrix. |
| rstart | is the starting row. |
| nrows | is the number of rows from rstart to extract. |
| void gk_csr_Free | ( | gk_csr_t ** | mat | ) |
Frees all the memory allocated for matrix.
| mat | is the matrix to be freed. |
| void gk_csr_FreeContents | ( | gk_csr_t * | mat | ) |
Frees only the memory allocated for the matrix's different fields and sets them to NULL.
| mat | is the matrix whose contents will be freed. |
| int gk_csr_GetSimilarRows | ( | gk_csr_t * | mat, |
| int | nqterms, | ||
| int * | qind, | ||
| float * | qval, | ||
| int | simtype, | ||
| int | nsim, | ||
| float | minsim, | ||
| gk_fkv_t * | hits, | ||
| int * | i_marker, | ||
| gk_fkv_t * | i_cand | ||
| ) |
Finds the n most similar rows (neighbors) to the query using cosine similarity.
| mat | the matrix itself |
| nqterms | is the number of columns in the query |
| qind | is the list of query columns |
| qval | is the list of correspodning query weights |
| simtype | is the type of similarity and is one of GK_CSR_COS, GK_CSR_JAC, GK_CSR_MIN |
| nsim | is the maximum number of requested most similar rows. If -1 is provided, then everything is returned unsorted. |
| minsim | is the minimum similarity of the requested most similar rows |
| hits | is the result set. This array should be at least of length nsim. |
| i_marker | is an array of size equal to the number of rows whose values are initialized to -1. If NULL is provided then this array is allocated and freed internally. |
| i_cand | is an array of size equal to the number of rows. If NULL is provided then this array is allocated and freed internally. |
| void gk_csr_Init | ( | gk_csr_t * | mat | ) |
Initializes the matrix
| mat | is the matrix to be initialized. |
| gk_csr_t* gk_csr_LowFilter | ( | gk_csr_t * | mat, |
| int | what, | ||
| int | norm, | ||
| float | fraction | ||
| ) |
Eliminates certain entries from the rows/columns of the matrix. The filtering takes place by keeping only the highest weight entries whose sum accounts for a certain fraction of the overall weight of the row/column.
| mat | the matrix to be prunned, |
| what | indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) of the matrix will be prunned, |
| norm | indicates the norm that will be used to aggregate the weights and possible values are 1 or 2, |
| fraction | is the fraction of the overall norm that will be retained by the kept entries. |
| void gk_csr_Normalize | ( | gk_csr_t * | mat, |
| int | what, | ||
| int | norm | ||
| ) |
Normalizes the rows/columns of the matrix to be unit length.
| mat | the matrix itself, |
| what | indicates what will be normalized and is obtained by specifying GK_CSR_ROW, GK_CSR_COL, GK_CSR_ROW|GK_CSR_COL. |
| norm | indicates what norm is to normalize to, 1: 1-norm, 2: 2-norm |
| gk_csr_t* gk_csr_Prune | ( | gk_csr_t * | mat, |
| int | what, | ||
| int | minf, | ||
| int | maxf | ||
| ) |
Prunes certain rows/columns of the matrix. The prunning takes place by analyzing the row structure of the matrix. The prunning takes place by removing rows/columns but it does not affect the numbering of the remaining rows/columns.
| mat | the matrix to be prunned, |
| what | indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) of the matrix will be prunned, |
| minf | is the minimum number of rows (columns) that a column (row) must be present in order to be kept, |
| maxf | is the maximum number of rows (columns) that a column (row) must be present at in order to be kept. |
| gk_csr_t* gk_csr_Read | ( | char * | filename, |
| int | format, | ||
| int | readvals, | ||
| int | numbering | ||
| ) |
Reads a CSR matrix from the supplied file and stores it the matrix's forward structure.
| filename | is the file that stores the data. |
| format | is either GK_CSR_FMT_CLUTO or GK_CSR_FMT_CSR specifying the type of the input format. The difference between the two formats is that GK_CSR_FMT_CLUTO requires the header line, whereas GK_CSR_FMT_CSR does not. |
| readvals | is either 1 or 0, indicating if the CSR file contains values or it does not. It only applies when GK_CSR_FMT_CSR is used. |
| numbering | is either 1 or 0, indicating if the numbering of the indices start from 1 or 0, respectively. If they start from 1, they are automatically decreamented during input so that they will start from 0. It only applies when GK_CSR_FMT_CSR is used. |
| void gk_csr_Scale | ( | gk_csr_t * | mat, |
| int | type | ||
| ) |
Applies different row scaling methods.
| mat | the matrix itself, |
| type | indicates the type of row scaling. Possible values are: GK_CSR_MAXTF, GK_CSR_SQRT, GK_CSR_LOG, GK_CSR_IDF. |
| void gk_csr_SortIndices | ( | gk_csr_t * | mat, |
| int | what | ||
| ) |
Sorts the indices in increasing order
| mat | the matrix itself, |
| what | is either GK_CSR_ROW or GK_CSR_COL indicating which set of indices to sort. |
| gk_csr_t** gk_csr_Split | ( | gk_csr_t * | mat, |
| int * | color | ||
| ) |
Splits the matrix into multiple sub-matrices based on the provided color array.
| mat | is the original matrix. |
| color | is an array of size equal to the number of non-zeros in the matrix (row-wise structure). The matrix is split into as many parts as the number of colors. For meaningfull results, the colors should be numbered consecutively starting from 0. |
| gk_csr_t* gk_csr_TopKPlusFilter | ( | gk_csr_t * | mat, |
| int | what, | ||
| int | topk, | ||
| float | keepval | ||
| ) |
Eliminates certain entries from the rows/columns of the matrix. The filtering takes place by keeping only the highest weight top-K entries along each row/column and those entries whose weight is greater than a specified value.
| mat | the matrix to be prunned, |
| what | indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) of the matrix will be prunned, |
| topk | is the number of the highest weight entries to keep. |
| keepval | is the weight of a term above which will be kept. This is used to select additional terms past the first topk. |
| void gk_csr_Write | ( | gk_csr_t * | mat, |
| char * | filename, | ||
| int | format, | ||
| int | writevals, | ||
| int | numbering | ||
| ) |
Writes the row-based structure of a matrix into a file.
| mat | is the matrix to be written, |
| filename | is the name of the output file. |
| format | is one of GK_CSR_FMT_CLUTO or GK_CSR_FMT_CSR specifying the format of the output file. |
| writevals | is either 1 or 0 indicating if the values will be written or not. This is only applicable when GK_CSR_FMT_CSR is used. |
| numbering | is either 1 or 0 indicating if the internal 0-based numbering will be shifted by one or not during output. This is only applicable when GK_CSR_FMT_CSR is used. |
| gk_csr_t* gk_csr_ZScoreFilter | ( | gk_csr_t * | mat, |
| int | what, | ||
| float | zscore | ||
| ) |
Eliminates certain entries from the rows/columns of the matrix. The filtering takes place by keeping only the terms whose contribution to the total length of the document is greater than a user-splied multiple over the average.
This routine assumes that the vectors are normalized to be unit length.
| mat | the matrix to be prunned, |
| what | indicates if the rows (GK_CSR_ROW) or the columns (GK_CSR_COL) of the matrix will be prunned, |
| zscore | is the multiplicative factor over the average contribution to the length of the document. |
1.7.6.1