diff --git a/data/rpq_data/c.mtx b/data/rpq_data/c.mtx new file mode 100644 index 0000000000..6f7121ec43 --- /dev/null +++ b/data/rpq_data/c.mtx @@ -0,0 +1,17 @@ +%%MatrixMarket matrix coordinate pattern general$ +%%GraphBLAS type bool$ +8 8 14 +1 2 +1 4 +1 7 +2 3 +2 8 +3 1 +3 6 +4 5 +5 2 +5 8 +6 4 +7 1 +7 6 +8 3 diff --git a/experimental/algorithm/LAGraph_RPQMatrix.c b/experimental/algorithm/LAGraph_RPQMatrix.c new file mode 100644 index 0000000000..91be10ac81 --- /dev/null +++ b/experimental/algorithm/LAGraph_RPQMatrix.c @@ -0,0 +1,515 @@ +//------------------------------------------------------------------------------ +// LAGraph_RPQMatrix: regular path query algortithm +//------------------------------------------------------------------------------ +// +// LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause + +// Contributed by Rodion Suvorov, Semyon Grigoriev, St. Petersburg State +// University. + +//------------------------------------------------------------------------------ + +// Code is based on the algorithm described in the following paper: +// * Diego Arroyuelo, Adrián Gómez-Brandón & Gonzalo Navarro "Evaluating +// regular path queries on compressed adjacency matrices" +// * URL: https://link.springer.com/article/10.1007/s00778-024-00885-6 + +//------------------------------------------------------------------------------ +// LAGraph_RPQMatrix: regular path query algortithm +// +// For an edge-labelled directed graph the algorithm computes the nubmer of +// nonzero elements in its reachability matrix. +// The reachability matrix created by following rules: +// * A[i,j] = True if node with index j is reachable from node with index i +// and concatenation of labels over path between these two labels is a word +// from specified regular language. +// * A[i,j] = False in other cases. +// +// The algorithm is based on the idea of ​​considering a regular constraint as +// an abstract syntax tree, the leaves of which are matrices of adjacency matrix +// decomposition of the graph, and the internal nodes are the operations of +// conjunction, concatenation, etc. +// +// Example of adjacency matrix decomposition: +// +// Graph: +// (0) --[a]-> (1) +// | ^ +// [b] [c]--/ +// | --/ +// v / +// (2) --[b]-> (3) +// +// Adjacency matrix decomposition of this graph consists of: +// * Adjacency matrix for the label a: +// 0 1 2 3 +// 0 | | T | | | +// 1 | | | | | +// 2 | | | | | +// 3 | | | | | +// * Adjacency matrix for the label b: +// 0 1 2 3 +// 0 | | | T | | +// 1 | | | | | +// 2 | | | | T | +// 3 | | | | | +// * Adjacency matrix for the label c: +// 0 1 2 3 +// 0 | | | | | +// 1 | | | | | +// 2 | | T | | | +// 3 | | | | | +// +// The algorithm recursively starts from the root of the given tree and +// performs the operations corresponding to each node on the children of that +// node. As a result of the algorithm's execution, the reachability +// matrix will be stored at the root. +// +// Example of regular expression and its corresponding AST: +// +// Regular expression: +// a/(b|c)* +// +// Abstract syntax tree: +// ┌─┐ +// │/| (3) +// └┬┘ +// ┌─┬─┴─┬─┐ +// │a│ │*│ (2) +// └─┘ └┬┘ +// ┌┴┐ +// │|│ (1) +// └┬┘ +// ┌─┬─┴─┬─┐ +// │b│ │c│ +// └─┘ └─┘ +// The numbers next to the graph nodes show the order in which operations are +// executed. For the decomposition and AST specified above, the resulting +// matrix will have the following structure (Note, that * represents the +// reflexive-transitive closure): +// +// 0 1 2 3 +// 0 | | T | | | +// 1 | | | | | +// 2 | | | | | +// 3 | | | | | +// +// So for this example LAGraph_RPQMatrix will return 1. +// +// Full description available at: +// https://arxiv.org/pdf/2307.14930 + +#define LG_FREE_WORK \ + { \ + } + +#define LG_FREE_ALL \ + { \ + LG_FREE_WORK ; \ + } + +#include "LG_internal.h" +#include "LAGraphX.h" +#include + +#define OK(s) \ +{ \ + GrB_Info info = (s); \ + if (info != GrB_SUCCESS) \ + { \ + fprintf(stderr, "GraphBLAS error: %d\n", info); \ + return info; \ + } \ +} + + +#include +#include + +GrB_Info LAGraph_RPQMatrix_check(RPQMatrixPlan *plan, GrB_Index *dimension, char *msg) +{ + if (plan == NULL) + { + return GrB_SUCCESS ; + } + if (plan->op == RPQ_MATRIX_OP_LABEL) + { + LG_ASSERT(plan->mat != NULL, GrB_NULL_POINTER); + GrB_Index nrows, ncols ; + OK(GrB_Matrix_nrows(&nrows, plan->mat)) ; + OK(GrB_Matrix_ncols(&ncols, plan->mat)) ; + if (*dimension == -1) + { + LG_ASSERT_MSG(nrows == ncols, GrB_INVALID_VALUE, + "all the matrices in the graph adjacency matrix decomposition " + "should have the same dimensions and be square") ; + *dimension = ncols ; + } + else + { + LG_ASSERT_MSG(nrows == *dimension && ncols == *dimension, GrB_INVALID_VALUE, + "all the matrices in the graph adjacency matrix decomposition " + "should have the same dimensions and be square") ; + } + + return GrB_SUCCESS ; + } + GrB_Info lstatus = LAGraph_RPQMatrix_check(plan->lhs, dimension, msg) ; + GrB_Info rstatus = LAGraph_RPQMatrix_check(plan->rhs, dimension, msg) ; + if (rstatus || lstatus) + { + return GrB_INVALID_VALUE ; + } + return GrB_SUCCESS ; +} + +static GrB_Semiring sr ; +static GrB_Monoid op ; + +GrB_Info LAGraph_RPQMatrix_solver(RPQMatrixPlan *plan, char *msg) ; + +static GrB_Info LAGraph_RPQMatrixLor(RPQMatrixPlan *plan, char *msg) +{ + LG_ASSERT(plan != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(plan->op == RPQ_MATRIX_OP_LOR, GrB_INVALID_VALUE) ; + LG_ASSERT(plan->res_mat == NULL, GrB_INVALID_VALUE) ; + + RPQMatrixPlan *lhs = plan->lhs ; + RPQMatrixPlan *rhs = plan->rhs ; + + LG_ASSERT(lhs != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(rhs != NULL, GrB_NULL_POINTER) ; + + OK(LAGraph_RPQMatrix_solver(lhs, msg)) ; + OK(LAGraph_RPQMatrix_solver(rhs, msg)) ; + + LG_ASSERT(rhs->mat != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(lhs->mat != NULL, GrB_NULL_POINTER) ; + + GrB_Matrix lhs_mat = lhs->res_mat ; + GrB_Matrix rhs_mat = rhs->res_mat ; + + GrB_Index dimension ; + GrB_Matrix_ncols(&dimension, lhs_mat) ; + GrB_Matrix res ; + GrB_Matrix_new(&res, GrB_BOOL, dimension, dimension) ; + GRB_TRY(GrB_eWiseAdd(res, GrB_NULL, GrB_NULL, + op, lhs_mat, rhs_mat, GrB_DESC_R)) ; + plan->res_mat = res ; + + return (GrB_SUCCESS) ; +} + +static GrB_Info LAGraph_RPQMatrixConcat(RPQMatrixPlan *plan, char *msg) +{ + + LG_ASSERT(plan != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(plan->op == RPQ_MATRIX_OP_CONCAT, GrB_INVALID_VALUE) ; + LG_ASSERT(plan->res_mat == NULL, GrB_INVALID_VALUE) ; + + RPQMatrixPlan *lhs = plan->lhs ; + RPQMatrixPlan *rhs = plan->rhs ; + + LG_ASSERT(lhs != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(rhs != NULL, GrB_NULL_POINTER) ; + + OK(LAGraph_RPQMatrix_solver(lhs, msg)) ; + OK(LAGraph_RPQMatrix_solver(rhs, msg)) ; + + GrB_Matrix lhs_mat = lhs->res_mat ; + GrB_Matrix rhs_mat = rhs->res_mat ; + + GrB_Index dimension ; + GrB_Matrix_ncols(&dimension, lhs_mat) ; + GrB_Matrix res ; + GrB_Matrix_new(&res, GrB_BOOL, dimension, dimension) ; + GRB_TRY(GrB_mxm(res, GrB_NULL, GrB_NULL, + sr, lhs_mat, rhs_mat, GrB_DESC_R)) ; + plan->res_mat = res ; + + return (GrB_SUCCESS) ; +} + +static GrB_Info LAGraph_RPQMatrixKleene(RPQMatrixPlan *plan, char *msg) +{ + LG_ASSERT(plan != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(plan->op == RPQ_MATRIX_OP_KLEENE, GrB_INVALID_VALUE) ; + LG_ASSERT(plan->res_mat == NULL, GrB_INVALID_VALUE) ; + + RPQMatrixPlan *lhs = plan->lhs ; + RPQMatrixPlan *rhs = plan->rhs ; + + // Kleene star should have one child. Always right. + LG_ASSERT(lhs == NULL, GrB_INVALID_VALUE) ; + LG_ASSERT(rhs != NULL, GrB_NULL_POINTER) ; + + OK(LAGraph_RPQMatrix_solver(rhs, msg)) ; + + GrB_Matrix B = rhs->res_mat ; + // S <- I + GrB_Matrix S ; + + // Creating identity matrix. + GrB_Index n ; + GRB_TRY(GrB_Matrix_nrows(&n, B)) ; + GrB_Matrix I ; + GRB_TRY(GrB_Matrix_new(&I, GrB_BOOL, n, n)) ; + + GrB_Vector v ; + GRB_TRY(GrB_Vector_new(&v, GrB_BOOL, n)) ; + GRB_TRY(GrB_Vector_assign_BOOL(v, NULL, NULL, true, GrB_ALL, n, NULL)) ; + + GRB_TRY(GrB_Matrix_diag(&S, v, 0)) ; + + GRB_TRY(GrB_Vector_free(&v)) ; + + bool changed = true ; + GrB_Index nnz_S = n, nnz_Sold = 0 ; + + while (changed) + { + // S <- S x (B + I) + GRB_TRY(GrB_mxm(S, S, GrB_NULL, + sr, S, B, GrB_DESC_C)) ; + + GRB_TRY(GrB_Matrix_nvals(&nnz_S, S)) ; + if (nnz_S != nnz_Sold) + { + changed = true ; + nnz_Sold = nnz_S ; + } + else + { + changed = false ; + } + } + plan->res_mat = S ; + + GRB_TRY(GrB_Matrix_free(&I)) ; + return (GrB_SUCCESS) ; +} + +// this function need to handle special case where some optimization +// are available. +// +// consider following AST: +// ┌─┐ +// │/| +// └┬┘ +// ┌─┬─┴─┬─┐ +// │*│ │b│ +// └┬┘ └─┘ +// ┌┴┐ +// │a│ +// └─┘ +// If matrix B is sparse and A is dense, then instead of naive +// way: +// +// (I + A + A x A + ...) x B +// +// we can do: +// +// (B + A x B + A x A x B + ...) +// +// and AST should be rewritten in the following way: +// ┌───┐ +// │L^*│ +// └─┬─┘ +// ┌─┬─┴─┬─┐ +// │a│ │b│ +// └─┘ └─┘ +static GrB_Info LAGraph_RPQMatrixKleene_L(RPQMatrixPlan *plan, char *msg) +{ + LG_ASSERT(plan != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(plan->op == RPQ_MATRIX_OP_KLEENE_L, GrB_INVALID_VALUE) ; + LG_ASSERT(plan->res_mat == NULL, GrB_INVALID_VALUE) ; + + RPQMatrixPlan *lhs = plan->lhs ; // A + RPQMatrixPlan *rhs = plan->rhs ; // B + + LG_ASSERT(lhs != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(rhs != NULL, GrB_NULL_POINTER) ; + + OK(LAGraph_RPQMatrix_solver(lhs, msg)) ; + OK(LAGraph_RPQMatrix_solver(rhs, msg)) ; + + GrB_Matrix A = lhs->res_mat ; + GrB_Matrix B = rhs->res_mat ; + + // S <- B + GrB_Matrix S ; + GRB_TRY(GrB_Matrix_dup(&S, B)) ; + + bool changed = true ; + GrB_Index nnz_S = 0, nnz_Sold = 0 ; + + while (changed) + { + // S <- (A + I) x S + GRB_TRY(GrB_mxm(S, S, NULL, sr, A, S, GrB_DESC_C)) ; + + GRB_TRY(GrB_Matrix_nvals(&nnz_S, S)) ; + if (nnz_S != nnz_Sold) + { + changed = true ; + nnz_Sold = nnz_S ; + } + else + { + changed = false ; + } + } + + plan->res_mat = S ; + return GrB_SUCCESS ; +} + +// this function need to handle special case where some optimization +// are available. +// consider following AST: +// ┌─┐ +// │/| +// └┬┘ +// ┌─┬─┴─┬─┐ +// │a│ │*│ +// └─┘ └┬┘ +// ┌┴┐ +// │b│ +// └─┘ +// If matrix A is sparse and B is dense, then instead of naive +// way: +// +// A x (I + B + B x B + ...) +// +// we can do: +// +// (A + A x B + A x B x B + ...) +// +// and AST should be rewritten in the following way: +// ┌───┐ +// │R^*│ +// └─┬─┘ +// ┌─┬─┴─┬─┐ +// │a│ │b│ +// └─┘ └─┘ + +static GrB_Info LAGraph_RPQMatrixKleene_R(RPQMatrixPlan *plan, char *msg) +{ + LG_ASSERT(plan != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(plan->op == RPQ_MATRIX_OP_KLEENE_R, GrB_INVALID_VALUE) ; + LG_ASSERT(plan->res_mat == NULL, GrB_INVALID_VALUE) ; + + RPQMatrixPlan *lhs = plan->lhs ; // A + RPQMatrixPlan *rhs = plan->rhs ; // B + + LG_ASSERT(lhs != NULL, GrB_NULL_POINTER) ; + LG_ASSERT(rhs != NULL, GrB_NULL_POINTER) ; + + OK(LAGraph_RPQMatrix_solver(lhs, msg)) ; + OK(LAGraph_RPQMatrix_solver(rhs, msg)) ; + + GrB_Matrix A = lhs->res_mat ; + GrB_Matrix B = rhs->res_mat ; + + // S <- A + GrB_Matrix S ; + GRB_TRY(GrB_Matrix_dup(&S, A)) ; + + bool changed = true ; + GrB_Index nnz_S = 0, nnz_Sold = 0 ; + + while (changed) + { + // S <- S x (B + I) + GRB_TRY(GrB_mxm(S, S, NULL, sr, S, B, GrB_DESC_C)) ; + + GRB_TRY(GrB_Matrix_nvals(&nnz_S, S)) ; + if (nnz_S != nnz_Sold) + { + changed = true ; + nnz_Sold = nnz_S ; + } + else + { + changed = false ; + } + } + + plan->res_mat = S ; + return GrB_SUCCESS ; +} + + + +GrB_Info LAGraph_RPQMatrix_solver(RPQMatrixPlan *plan, char *msg) +{ + LG_ASSERT(plan->res_mat == NULL, GrB_INVALID_VALUE) ; + + switch (plan->op) + { + case RPQ_MATRIX_OP_LABEL: + LG_ASSERT_MSG(plan->lhs == NULL && plan->rhs == NULL, + GrB_INVALID_VALUE, "label node should not have any children nodes") ; + plan->res_mat = plan->mat ; + return (GrB_SUCCESS) ; + case RPQ_MATRIX_OP_LOR: + return LAGraph_RPQMatrixLor(plan, msg) ; + case RPQ_MATRIX_OP_CONCAT: + return LAGraph_RPQMatrixConcat(plan, msg) ; + case RPQ_MATRIX_OP_KLEENE: + return LAGraph_RPQMatrixKleene(plan, msg) ; + case RPQ_MATRIX_OP_KLEENE_L: + return LAGraph_RPQMatrixKleene_L(plan, msg) ; + case RPQ_MATRIX_OP_KLEENE_R: + return LAGraph_RPQMatrixKleene_R(plan, msg) ; + default: + LG_ASSERT_MSG(false, GrB_INVALID_VALUE, "invalid graph node type") ; + } + return (GrB_SUCCESS) ; +} + +GrB_Info LAGraph_RPQMatrix_initialize(void) +{ + sr = GxB_ANY_PAIR_BOOL ; + op = GxB_ANY_BOOL_MONOID ; + return GrB_SUCCESS; +} + +GrB_Info LAGraph_RPQMatrix( + // output: + GrB_Index *nnz, // number of nonzero values in + // result reachability matrix + + // input: + RPQMatrixPlan *plan, // root of abstarct syntax tree of + // regular expression + char *msg // LAGraph output message +) +{ + + //-------------------------------------------------------------------------- + // check inputs + //-------------------------------------------------------------------------- + + LG_CLEAR_MSG ; + LG_ASSERT(plan != NULL, GrB_NULL_POINTER) ; + GrB_Index dimension = -1 ; + GrB_Info info = LAGraph_RPQMatrix_check(plan, &dimension, msg) ; + LG_ASSERT_MSG(info == GrB_SUCCESS, info, msg) ; + + //-------------------------------------------------------------------------- + // initialize + //-------------------------------------------------------------------------- + + LAGraph_RPQMatrix_initialize() ; + + //-------------------------------------------------------------------------- + // run solver + //-------------------------------------------------------------------------- + + info = LAGraph_RPQMatrix_solver(plan, msg) ; + LG_ASSERT_MSG(info == GrB_SUCCESS, info, msg) ; + GrB_Matrix_nvals(nnz, plan->res_mat) ; + return GrB_SUCCESS ; +} diff --git a/experimental/test/test_RPQMatrix.c b/experimental/test/test_RPQMatrix.c new file mode 100644 index 0000000000..738418ff8f --- /dev/null +++ b/experimental/test/test_RPQMatrix.c @@ -0,0 +1,481 @@ +//------------------------------------------------------------------------------ +// LAGraph/experimental/test/test_RPQMatrix.c: test cases for RPQ-matrix +// reachability algorithm +//------------------------------------------------------------------------------ +// +// LAGraph, (c) 2019-2024 by The LAGraph Contributors, All Rights Reserved. +// SPDX-License-Identifier: BSD-2-Clause + +// Contributed by Rodion Suvorov, Semyon Grigoriev, St. Petersburg State +// University. + +//------------------------------------------------------------------------------ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define LEN 512 +char msg[LAGRAPH_MSG_LEN] ; + +static void load_matrix(GrB_Matrix *M, const char *name) +{ + char filename[LEN + 1] ; + snprintf(filename, LEN, LG_DATA_DIR "%s", name) ; + FILE *f = fopen(filename, "r") ; + TEST_CHECK(f != NULL) ; + OK(LAGraph_MMRead(M, f, msg)) ; + fclose(f) ; +} + +static char *matrix_to_str(GrB_Matrix M) +{ + GrB_Index nnz = 0 ; + OK(GrB_Matrix_nvals(&nnz, M)) ; + + GrB_Index *I = NULL ; + GrB_Index *J = NULL ; + bool *X = NULL ; + + OK(LAGraph_Malloc((void **)&I, nnz, sizeof(GrB_Index), msg)) ; + OK(LAGraph_Malloc((void **)&J, nnz, sizeof(GrB_Index), msg)) ; + OK(LAGraph_Malloc((void **)&X, nnz, sizeof(bool), msg)) ; + + OK(GrB_Matrix_extractTuples_BOOL(I, J, X, &nnz, M)) ; + + size_t bufsize = 11 * nnz + 1 ; + char *buf = NULL ; + OK(LAGraph_Malloc((void **)&buf, bufsize, sizeof(char), msg)) ; + buf[0] = '\0' ; + + for (size_t k = 0; k < nnz; k++) + { + char tmp[64] ; + snprintf(tmp, sizeof(tmp), k == 0 ? "(%" PRIu64 ", %" PRIu64 ")" : " (%" PRIu64 ", %" PRIu64 ")", I[k] + 1, J[k] + 1) ; + strcat(buf, tmp) ; + } + + LAGraph_Free((void **)&I, msg) ; + LAGraph_Free((void **)&J, msg) ; + LAGraph_Free((void **)&X, msg) ; + + return buf ; +} + +static void check_result_matrix(GrB_Matrix result, const char *expected) +{ + char *actual = matrix_to_str(result) ; + TEST_CHECK(strcmp(actual, expected) == 0) ; + if (strcmp(actual, expected) != 0) + { + TEST_MSG("Expected: %s\nActual: %s", expected, actual) ; + } + LAGraph_Free((void **)&actual, msg) ; +} + +//======================== +// Tests with valid result +//======================== + +// a/b +void test_RPQMatrix_CONCAT(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A, B ; + load_matrix(&A, "rpq_data/a.mtx") ; + load_matrix(&B, "rpq_data/b.mtx") ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan concat = { + .op = RPQ_MATRIX_OP_CONCAT, + .lhs = &planA, + .rhs = &planB} ; + GrB_Index result_nnz ; + OK(LAGraph_RPQMatrix(&result_nnz, &concat, msg)) ; + + const char *expected = "(1, 5) (1, 7) (2, 4) (2, 6) (3, 3) (7, 3)" ; + check_result_matrix(concat.res_mat, expected) ; + TEST_CHECK(result_nnz == 6) ; + + GrB_Matrix_free(&A) ; + GrB_Matrix_free(&B) ; + LAGraph_Finalize(msg) ; +} + +// a|b +void test_RPQMatrix_LOR(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A, B ; + load_matrix(&A, "rpq_data/a.mtx") ; + load_matrix(&B, "rpq_data/b.mtx") ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan lor = { + .op = RPQ_MATRIX_OP_LOR, + .lhs = &planA, + .rhs = &planB} ; + GrB_Index result_nnz ; + OK(LAGraph_RPQMatrix(&result_nnz, &lor, msg)) ; + + const char *expected = "(1, 2) (1, 3) (1, 7) (2, 4) (2, 5) (2, 7) (3, 6) (4, 4) (4, 6) " + "(5, 1) (5, 8) (6, 3) (7, 6)" ; + check_result_matrix(lor.res_mat, expected) ; + TEST_CHECK(result_nnz == 13) ; + + GrB_Matrix_free(&A) ; + GrB_Matrix_free(&B) ; + LAGraph_Finalize(msg) ; +} + +// a* +void test_RPQMatrix_KLEENE(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A ; + load_matrix(&A, "rpq_data/a.mtx") ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan kleene = {.op = RPQ_MATRIX_OP_KLEENE, .rhs = &planA} ; + GrB_Index result_nnz ; + OK(LAGraph_RPQMatrix(&result_nnz, &kleene, msg)) ; + + const char *expected = + "(1, 1) (1, 2) (1, 4) (1, 6) (1, 7) (2, 2) (2, 4) (3, 3) " + "(3, 6) (4, 4) (5, 5) (5, 8) (6, 6) (7, 6) (7, 7) (8, 8)" ; + check_result_matrix(kleene.res_mat, expected) ; + TEST_CHECK(result_nnz == 16) ; + + GrB_Matrix_free(&A) ; + LAGraph_Finalize(msg) ; +} + +// (a)*/b +void test_RPQMatrix_KLEENE_L(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A, B ; + load_matrix(&A, "rpq_data/a.mtx") ; + load_matrix(&B, "rpq_data/b.mtx") ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan kleene = { + .op = RPQ_MATRIX_OP_KLEENE, + .rhs = &planA} ; + RPQMatrixPlan concat = { + .op = RPQ_MATRIX_OP_CONCAT, + .lhs = &kleene, + .rhs = &planB} ; + + GrB_Index result_nnz ; + OK(LAGraph_RPQMatrix(&result_nnz, &concat, msg)) ; + const char *expected = "(1, 3) (1, 4) (1, 5) (1, 6) (1, 7) " + "(2, 4) (2, 5) (2, 6) (2, 7) (3, 3) " + "(4, 4) (4, 6) (5, 1) (6, 3) (7, 3)" ; + check_result_matrix(concat.res_mat, expected) ; + TEST_CHECK(result_nnz == 15) ; + GrB_Info expected_nnz = result_nnz ; + RPQMatrixPlan planA2 = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB2 = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan kleeneL = { + .op = RPQ_MATRIX_OP_KLEENE_L, + .lhs = &planA2, + .rhs = &planB2} ; + + OK(LAGraph_RPQMatrix(&result_nnz, &kleeneL, msg)) ; + + expected = matrix_to_str(concat.res_mat) ; + + check_result_matrix(kleeneL.res_mat, expected) ; + TEST_CHECK(result_nnz == expected_nnz) ; + + GrB_Matrix_free(&A) ; + GrB_Matrix_free(&B) ; + LAGraph_Finalize(msg) ; +} + +// b/(a)* +void test_RPQMatrix_KLEENE_R(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A, B ; + load_matrix(&A, "rpq_data/a.mtx") ; + load_matrix(&B, "rpq_data/b.mtx") ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan kleene = { + .op = RPQ_MATRIX_OP_KLEENE, + .rhs = &planB} ; + RPQMatrixPlan concat = { + .op = RPQ_MATRIX_OP_CONCAT, + .lhs = &planA, + .rhs = &kleene} ; + + GrB_Index result_nnz; + OK(LAGraph_RPQMatrix(&result_nnz, &concat, msg)) ; + const char *expected = "(1, 1) (1, 2) (1, 3) (1, 5) (1, 7) " + "(2, 3) (2, 4) (2, 6) (3, 3) (3, 6) " + "(5, 8) (7, 3) (7, 6)" ; + check_result_matrix(concat.res_mat, expected) ; + TEST_CHECK(result_nnz == 13) ; + GrB_Info expected_nnz = result_nnz ; + + RPQMatrixPlan planA2 = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB2 = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan kleeneR = { + .op = RPQ_MATRIX_OP_KLEENE_R, + .lhs = &planA2, + .rhs = &planB2} ; + + OK(LAGraph_RPQMatrix(&result_nnz, &kleeneR, msg)) ; + + expected = matrix_to_str(concat.res_mat) ; + check_result_matrix(kleeneR.res_mat, expected) ; + TEST_CHECK(result_nnz == expected_nnz) ; + + GrB_Matrix_free(&A) ; + GrB_Matrix_free(&B) ; + LAGraph_Finalize(msg) ; +} + +// c/(a|b)* +void test_RPQMatrix_Complex(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A, B, C ; + load_matrix(&A, "rpq_data/a.mtx") ; + load_matrix(&B, "rpq_data/b.mtx") ; + load_matrix(&C, "rpq_data/c.mtx") ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan planC = {.op = RPQ_MATRIX_OP_LABEL, .mat = C} ; + RPQMatrixPlan lor = {.op = RPQ_MATRIX_OP_LOR, .lhs = &planA, .rhs = &planB} ; + RPQMatrixPlan kleene = { + .op = RPQ_MATRIX_OP_KLEENE, + .rhs = &lor} ; + + RPQMatrixPlan concat = {.op = RPQ_MATRIX_OP_CONCAT, .lhs = &planC, .rhs = &kleene} ; + GrB_Index result_nnz ; + OK(LAGraph_RPQMatrix(&result_nnz, &concat, msg)) ; + + const char *expected = "(1, 1) (1, 2) (1, 3) (1, 4) (1, 5) (1, 6) (1, 7) (1, 8) " + "(2, 3) (2, 6) (2, 8) " + "(3, 1) (3, 2) (3, 3) (3, 4) (3, 5) (3, 6) (3, 7) (3, 8) " + "(4, 1) (4, 2) (4, 3) (4, 4) (4, 5) (4, 6) (4, 7) (4, 8) " + "(5, 1) (5, 2) (5, 3) (5, 4) (5, 5) (5, 6) (5, 7) (5, 8) " + "(6, 3) (6, 4) (6, 6) " + "(7, 1) (7, 2) (7, 3) (7, 4) (7, 5) (7, 6) (7, 7) (7, 8) " + "(8, 3) (8, 6)" ; + check_result_matrix(concat.res_mat, expected) ; + TEST_CHECK(result_nnz == 48) ; + + GrB_Matrix_free(&A); + GrB_Matrix_free(&B); + GrB_Matrix_free(&C); + LAGraph_Finalize(msg) ; +} + +//========================== +// Tests with invalid result +//========================== + +void test_RPQMatrix_Incorrect_size(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix M ; + OK(GrB_Matrix_new(&M, GrB_BOOL, 3, 4)) ; + + RPQMatrixPlan plan = {.op = RPQ_MATRIX_OP_LABEL, .mat = M} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &plan, msg) ; + TEST_CHECK(info == GrB_INVALID_VALUE) ; + TEST_MSG("Expected GrB_INVALID_VALUE for non-square matrix") ; + + GrB_Matrix_free(&M) ; + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Unequal_size(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A, B ; + OK(GrB_Matrix_new(&A, GrB_BOOL, 3, 3)) ; + OK(GrB_Matrix_new(&B, GrB_BOOL, 4, 4)) ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + RPQMatrixPlan planB = {.op = RPQ_MATRIX_OP_LABEL, .mat = B} ; + RPQMatrixPlan concat = {.op = RPQ_MATRIX_OP_CONCAT, .lhs = &planA, .rhs = &planB} ; + + GrB_Index result_nnz ; + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &concat, msg) ; + + TEST_CHECK(info == GrB_INVALID_VALUE) ; + TEST_MSG("Expected GrB_INVALID_VALUE for unequal matrix sizes") ; + + GrB_Matrix_free(&A) ; + GrB_Matrix_free(&B) ; + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Null_Child_concat(void) +{ + LAGraph_Init(msg) ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL} ; + RPQMatrixPlan concat = {.op = RPQ_MATRIX_OP_CONCAT, .lhs = &planA, .rhs = NULL} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &concat, msg) ; + TEST_CHECK(info == GrB_NULL_POINTER || info == GrB_INVALID_VALUE) ; + TEST_MSG("retval = %d (%s)", info, msg) ; + + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Null_Child_lor(void) +{ + LAGraph_Init(msg) ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL} ; + RPQMatrixPlan lor = {.op = RPQ_MATRIX_OP_LOR, .lhs = &planA, .rhs = NULL} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &lor, msg) ; + TEST_CHECK(info == GrB_NULL_POINTER || info == GrB_INVALID_VALUE) ; + TEST_MSG("retval = %d (%s)", info, msg) ; + + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Null_Child_L_Kleene(void) +{ + LAGraph_Init(msg) ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL} ; + RPQMatrixPlan kleeneL = {.op = RPQ_MATRIX_OP_KLEENE_L, .lhs = &planA, .rhs = NULL} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &kleeneL, msg) ; + TEST_CHECK(info == GrB_NULL_POINTER || info == GrB_INVALID_VALUE) ; + TEST_MSG("retval = %d (%s)", info, msg) ; + + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Null_Child_R_Kleene(void) +{ + LAGraph_Init(msg) ; + + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL} ; + RPQMatrixPlan kleeneR = {.op = RPQ_MATRIX_OP_KLEENE_R, .lhs = &planA, .rhs = NULL} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &kleeneR, msg) ; + TEST_CHECK(info == GrB_NULL_POINTER || info == GrB_INVALID_VALUE) ; + TEST_MSG("retval = %d (%s)", info, msg) ; + + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Non_Null_Children_Kleene(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A ; + OK(GrB_Matrix_new(&A, GrB_BOOL, 3, 3)) ; + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + + RPQMatrixPlan kleene = {.op = RPQ_MATRIX_OP_KLEENE, .lhs = &planA, .rhs = &planA} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &kleene, msg) ; + TEST_CHECK(info == GrB_INVALID_VALUE) ; + TEST_MSG("retval = %d (%s)", info, msg) ; + + GrB_Matrix_free(&A) ; + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Left_Child_Kleene(void) +{ + LAGraph_Init(msg) ; + + GrB_Matrix A ; + OK(GrB_Matrix_new(&A, GrB_BOOL, 3, 3)) ; + RPQMatrixPlan planA = {.op = RPQ_MATRIX_OP_LABEL, .mat = A} ; + + RPQMatrixPlan kleene = {.op = RPQ_MATRIX_OP_KLEENE, .lhs = &planA, .rhs = NULL} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &kleene, msg) ; + TEST_CHECK(info == GrB_INVALID_VALUE) ; + TEST_MSG("Expected GrB_NULL_POINTER for KLEENE with only left child") ; + + GrB_Matrix_free(&A) ; + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Invalid_Op_type(void) +{ + LAGraph_Init(msg) ; + + RPQMatrixPlan plan = {.op = (RPQMatrixOp)52} ; + GrB_Index result_nnz ; + + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, &plan, msg) ; + TEST_CHECK(info == GrB_INVALID_VALUE) ; + + LAGraph_Finalize(msg) ; +} + +void test_RPQMatrix_Null_root(void) +{ + LAGraph_Init(msg) ; + + GrB_Index result_nnz ; + GrB_Info info = LAGraph_RPQMatrix(&result_nnz, NULL, msg) ; + + TEST_CHECK(info == GrB_NULL_POINTER) ; + TEST_MSG("Expected GrB_NULL_POINTER for NULL plan root") ; + + LAGraph_Finalize(msg) ; +} + + +TEST_LIST = { + {"RPQMatrix_CONCAT", test_RPQMatrix_CONCAT}, + {"RPQMatrix_LOR", test_RPQMatrix_LOR}, + {"RPQMatrix_KLEENE", test_RPQMatrix_KLEENE}, + {"RPQMatrix_KLEENE_L", test_RPQMatrix_KLEENE_L}, + {"RPQMatrix_KLEENE_R", test_RPQMatrix_KLEENE_R}, + {"RPQMatrix_Complex", test_RPQMatrix_Complex}, + {"RPQMatrix_Incorrect_size", test_RPQMatrix_Incorrect_size}, + {"RPQMatrix_Unequal_size", test_RPQMatrix_Unequal_size}, + {"RPQMatrix_Null_Child_concat", test_RPQMatrix_Null_Child_concat}, + {"RPQMatrix_Null_Child_lor", test_RPQMatrix_Null_Child_lor}, + {"RPQMatrix_Null_Child_L_Kleene", test_RPQMatrix_Null_Child_L_Kleene}, + {"RPQMatrix_Null_Child_R_Kleene", test_RPQMatrix_Null_Child_R_Kleene}, + {"RPQMatrix_Non_Null_Children_Kleene", test_RPQMatrix_Non_Null_Children_Kleene}, + {"RPQMatrix_Left_Child_Kleene", test_RPQMatrix_Left_Child_Kleene}, + {"RPQMatrix_Invalid_Op_type", test_RPQMatrix_Invalid_Op_type}, + {"RPQMatrix_Null_root", test_RPQMatrix_Null_root}, + {NULL, NULL}} ; diff --git a/include/LAGraphX.h b/include/LAGraphX.h index 7fa3655f2d..132f875b11 100644 --- a/include/LAGraphX.h +++ b/include/LAGraphX.h @@ -845,6 +845,123 @@ int LAGraph_RegularPathQuery // nodes reachable from the starting by the size_t ns, // number of source vertices char *msg // LAGraph output message ); +//**************************************************************************** + +// LAGraph_RPQMatrix: regular path query algortithm +// +// For an edge-labelled directed graph the algorithm computes the nubmer of +// nonzero elements in its reachability matrix. +// The reachability matrix created by following rules: +// * A[i,j] = True if node with index j is reachable from node with index i +// and concatenation of labels over path between these two labels is a word +// from specified regular language. +// * A[i,j] = False in other cases. +// +// The algorithm is based on the idea of ​​considering a regular constraint as +// an abstract syntax tree, the leaves of which are matrices of adjacency matrix +// decomposition of the graph, and the internal nodes are the operations of +// conjunction, concatenation, etc. +// +// Example of adjacency matrix decomposition: +// +// Graph: +// (0) --[a]-> (1) +// | ^ +// [b] [c]--/ +// | --/ +// v / +// (2) --[b]-> (3) +// +// Adjacency matrix decomposition of this graph consists of: +// * Adjacency matrix for the label a: +// 0 1 2 3 +// 0 | | T | | | +// 1 | | | | | +// 2 | | | | | +// 3 | | | | | +// * Adjacency matrix for the label b: +// 0 1 2 3 +// 0 | | | T | | +// 1 | | | | | +// 2 | | | | T | +// 3 | | | | | +// * Adjacency matrix for the label c: +// 0 1 2 3 +// 0 | | | | | +// 1 | | | | | +// 2 | | T | | | +// 3 | | | | | +// +// The algorithm recursively starts from the root of the given tree and +// performs the operations corresponding to each node on the children of that +// node. As a result of the algorithm's execution, the reachability +// matrix will be stored at the root. +// +// Example of regular expression and its corresponding AST: +// +// Regular expression: +// a/(b|c)* +// +// Abstract syntax tree: +// ┌─┐ +// │/| (3) +// └┬┘ +// ┌─┬─┴─┬─┐ +// │a│ │*│ (2) +// └─┘ └┬┘ +// ┌┴┐ +// │|│ (1) +// └┬┘ +// ┌─┬─┴─┬─┐ +// │b│ │c│ +// └─┘ └─┘ +// The numbers next to the graph nodes show the order in which operations are +// executed. For the decomposition and AST specified above, the resulting +// matrix will have the following structure: +// +// 0 1 2 3 +// 0 | | T | | | +// 1 | | | | | +// 2 | | | | | +// 3 | | | | | +// +// So for this example LAGraph_RPQMatrix will return 1. +typedef enum RPQMatrixOp +{ + RPQ_MATRIX_OP_LABEL, // edge label of input graph + RPQ_MATRIX_OP_LOR, // alternation + RPQ_MATRIX_OP_CONCAT, // concatenation + RPQ_MATRIX_OP_KLEENE, // reflexive-transitive closure + RPQ_MATRIX_OP_KLEENE_L, // optimization for (A)*/B case, + // when B is sparse and A is dense + RPQ_MATRIX_OP_KLEENE_R, // optimization for A/(B)* case, + // when A is sparse and B is dense +} RPQMatrixOp ; + +typedef struct RPQMatrixPlan +{ + RPQMatrixOp op ; // type of tree node + struct RPQMatrixPlan *lhs ; // left subtree + struct RPQMatrixPlan *rhs ; // right subtree + GrB_Matrix mat ; // adjacency matrix of label. + // Only for leafes. Should be NULL + // in other cases + GrB_Matrix res_mat ; // result of subtree execution. + // Should be NULL +} RPQMatrixPlan ; + +LAGRAPHX_PUBLIC +GrB_Info LAGraph_RPQMatrix( + // output: + GrB_Index *nnz, // number of nonzero values in + // result reachability matrix + + // input: + RPQMatrixPlan *plan, // root of abstarct syntax tree of + // regular expression + char *msg // LAGraph output message +) ; + //**************************************************************************** LAGRAPHX_PUBLIC int LAGraph_VertexCentrality_Triangle // vertex triangle-centrality