|
|
|
|
@ -197,8 +197,8 @@ void xdl_free_env(xdfenv_t *xe) {
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) {
|
|
|
|
|
long r, rdis0, rpdis0, rdis1, rpdis1;
|
|
|
|
|
static bool xdl_clean_mmatch(uint8_t const *action, ptrdiff_t i, ptrdiff_t s, ptrdiff_t e) {
|
|
|
|
|
ptrdiff_t r, rdis0, rpdis0, rdis1, rpdis1;
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
* Limits the window that is examined during the similar-lines
|
|
|
|
|
@ -268,8 +268,7 @@ static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) {
|
|
|
|
|
* might be potentially discarded if they appear in a run of discardable.
|
|
|
|
|
*/
|
|
|
|
|
static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
|
|
|
|
|
long i, nm, mlim;
|
|
|
|
|
xrecord_t *recs;
|
|
|
|
|
ptrdiff_t i, nm, mlim1, mlim2;
|
|
|
|
|
xdlclass_t *rcrec;
|
|
|
|
|
uint8_t *action1 = NULL, *action2 = NULL;
|
|
|
|
|
bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
|
|
|
|
|
@ -291,20 +290,44 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
|
|
|
|
|
/*
|
|
|
|
|
* Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
|
|
|
|
|
*/
|
|
|
|
|
if ((mlim = xdl_bogosqrt((long)xdf1->nrec)) > XDL_MAX_EQLIMIT)
|
|
|
|
|
mlim = XDL_MAX_EQLIMIT;
|
|
|
|
|
for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
|
|
|
|
|
rcrec = cf->rcrecs[recs->minimal_perfect_hash];
|
|
|
|
|
if (need_min) {
|
|
|
|
|
/* i.e. infinity */
|
|
|
|
|
mlim1 = PTRDIFF_MAX;
|
|
|
|
|
} else {
|
|
|
|
|
mlim1 = xdl_bogosqrt((uint64_t)xdf1->nrec);
|
|
|
|
|
if (mlim1 > XDL_MAX_EQLIMIT)
|
|
|
|
|
mlim1 = XDL_MAX_EQLIMIT;
|
|
|
|
|
}
|
|
|
|
|
for (i = xdf1->dstart; i <= xdf1->dend; i++) {
|
|
|
|
|
size_t mph1 = xdf1->recs[i].minimal_perfect_hash;
|
|
|
|
|
rcrec = cf->rcrecs[mph1];
|
|
|
|
|
nm = rcrec ? rcrec->len2 : 0;
|
|
|
|
|
action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
|
|
|
|
|
if (nm == 0)
|
|
|
|
|
action1[i] = DISCARD;
|
|
|
|
|
else if (nm < mlim1)
|
|
|
|
|
action1[i] = KEEP;
|
|
|
|
|
else /* nm >= mlim1 */
|
|
|
|
|
action1[i] = INVESTIGATE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if ((mlim = xdl_bogosqrt((long)xdf2->nrec)) > XDL_MAX_EQLIMIT)
|
|
|
|
|
mlim = XDL_MAX_EQLIMIT;
|
|
|
|
|
for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
|
|
|
|
|
rcrec = cf->rcrecs[recs->minimal_perfect_hash];
|
|
|
|
|
if (need_min) {
|
|
|
|
|
/* i.e. infinity */
|
|
|
|
|
mlim2 = PTRDIFF_MAX;
|
|
|
|
|
} else {
|
|
|
|
|
mlim2 = xdl_bogosqrt((uint64_t)xdf2->nrec);
|
|
|
|
|
if (mlim2 > XDL_MAX_EQLIMIT)
|
|
|
|
|
mlim2 = XDL_MAX_EQLIMIT;
|
|
|
|
|
}
|
|
|
|
|
for (i = xdf2->dstart; i <= xdf2->dend; i++) {
|
|
|
|
|
size_t mph2 = xdf2->recs[i].minimal_perfect_hash;
|
|
|
|
|
rcrec = cf->rcrecs[mph2];
|
|
|
|
|
nm = rcrec ? rcrec->len1 : 0;
|
|
|
|
|
action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
|
|
|
|
|
if (nm == 0)
|
|
|
|
|
action2[i] = DISCARD;
|
|
|
|
|
else if (nm < mlim2)
|
|
|
|
|
action2[i] = KEEP;
|
|
|
|
|
else /* nm >= mlim2 */
|
|
|
|
|
action2[i] = INVESTIGATE;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
@ -312,27 +335,45 @@ static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xd
|
|
|
|
|
* false, or become true.
|
|
|
|
|
*/
|
|
|
|
|
xdf1->nreff = 0;
|
|
|
|
|
for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
|
|
|
|
|
i <= xdf1->dend; i++, recs++) {
|
|
|
|
|
if (action1[i] == KEEP ||
|
|
|
|
|
(action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) {
|
|
|
|
|
for (i = xdf1->dstart; i <= xdf1->dend; i++) {
|
|
|
|
|
uint8_t action = action1[i];
|
|
|
|
|
|
|
|
|
|
if (action == INVESTIGATE) {
|
|
|
|
|
if (!xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))
|
|
|
|
|
action = KEEP;
|
|
|
|
|
else
|
|
|
|
|
action = DISCARD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (action == KEEP) {
|
|
|
|
|
xdf1->reference_index[xdf1->nreff++] = i;
|
|
|
|
|
/* changed[i] remains false, i.e. keep */
|
|
|
|
|
} else
|
|
|
|
|
/* changed[i] remains false */
|
|
|
|
|
} else if (action == DISCARD) {
|
|
|
|
|
xdf1->changed[i] = true;
|
|
|
|
|
/* i.e. discard */
|
|
|
|
|
} else {
|
|
|
|
|
BUG("Illegal state for action");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
xdf2->nreff = 0;
|
|
|
|
|
for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart];
|
|
|
|
|
i <= xdf2->dend; i++, recs++) {
|
|
|
|
|
if (action2[i] == KEEP ||
|
|
|
|
|
(action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) {
|
|
|
|
|
for (i = xdf2->dstart; i <= xdf2->dend; i++) {
|
|
|
|
|
uint8_t action = action2[i];
|
|
|
|
|
|
|
|
|
|
if (action == INVESTIGATE) {
|
|
|
|
|
if (!xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))
|
|
|
|
|
action = KEEP;
|
|
|
|
|
else
|
|
|
|
|
action = DISCARD;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (action == KEEP) {
|
|
|
|
|
xdf2->reference_index[xdf2->nreff++] = i;
|
|
|
|
|
/* changed[i] remains false, i.e. keep */
|
|
|
|
|
} else
|
|
|
|
|
/* changed[i] remains false */
|
|
|
|
|
} else if (action == DISCARD) {
|
|
|
|
|
xdf2->changed[i] = true;
|
|
|
|
|
/* i.e. discard */
|
|
|
|
|
} else {
|
|
|
|
|
BUG("Illegal state for action");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
cleanup:
|
|
|
|
|
|