Merge branch 'rc/diff-cleanup-records'
* rc/diff-cleanup-records: xdiff/xprepare: improve O(n*m) performance in xdl_cleanup_records()maint
						commit
						497dff9138
					
				|  | @ -36,6 +36,7 @@ typedef struct s_xdlclass { | ||||||
| 	char const *line; | 	char const *line; | ||||||
| 	long size; | 	long size; | ||||||
| 	long idx; | 	long idx; | ||||||
|  | 	long len1, len2; | ||||||
| } xdlclass_t; | } xdlclass_t; | ||||||
|  |  | ||||||
| typedef struct s_xdlclassifier { | typedef struct s_xdlclassifier { | ||||||
|  | @ -43,6 +44,8 @@ typedef struct s_xdlclassifier { | ||||||
| 	long hsize; | 	long hsize; | ||||||
| 	xdlclass_t **rchash; | 	xdlclass_t **rchash; | ||||||
| 	chastore_t ncha; | 	chastore_t ncha; | ||||||
|  | 	xdlclass_t **rcrecs; | ||||||
|  | 	long alloc; | ||||||
| 	long count; | 	long count; | ||||||
| 	long flags; | 	long flags; | ||||||
| } xdlclassifier_t; | } xdlclassifier_t; | ||||||
|  | @ -52,15 +55,15 @@ typedef struct s_xdlclassifier { | ||||||
|  |  | ||||||
| static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags); | static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags); | ||||||
| static void xdl_free_classifier(xdlclassifier_t *cf); | static void xdl_free_classifier(xdlclassifier_t *cf); | ||||||
| static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, | ||||||
| 			       xrecord_t *rec); | 			       unsigned int hbits, xrecord_t *rec); | ||||||
| static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, | ||||||
| 			   xdlclassifier_t *cf, xdfile_t *xdf); | 			   xdlclassifier_t *cf, xdfile_t *xdf); | ||||||
| static void xdl_free_ctx(xdfile_t *xdf); | static void xdl_free_ctx(xdfile_t *xdf); | ||||||
| static int xdl_clean_mmatch(char const *dis, long i, long s, long e); | static int xdl_clean_mmatch(char const *dis, long i, long s, long e); | ||||||
| static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2); | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | ||||||
| static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); | static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2); | ||||||
| static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2); | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2); | ||||||
|  |  | ||||||
|  |  | ||||||
|  |  | ||||||
|  | @ -82,6 +85,14 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { | ||||||
| 	} | 	} | ||||||
| 	memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); | 	memset(cf->rchash, 0, cf->hsize * sizeof(xdlclass_t *)); | ||||||
|  |  | ||||||
|  | 	cf->alloc = size; | ||||||
|  | 	if (!(cf->rcrecs = (xdlclass_t **) xdl_malloc(cf->alloc * sizeof(xdlclass_t *)))) { | ||||||
|  |  | ||||||
|  | 		xdl_free(cf->rchash); | ||||||
|  | 		xdl_cha_free(&cf->ncha); | ||||||
|  | 		return -1; | ||||||
|  | 	} | ||||||
|  |  | ||||||
| 	cf->count = 0; | 	cf->count = 0; | ||||||
|  |  | ||||||
| 	return 0; | 	return 0; | ||||||
|  | @ -90,16 +101,18 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) { | ||||||
|  |  | ||||||
| static void xdl_free_classifier(xdlclassifier_t *cf) { | static void xdl_free_classifier(xdlclassifier_t *cf) { | ||||||
|  |  | ||||||
|  | 	xdl_free(cf->rcrecs); | ||||||
| 	xdl_free(cf->rchash); | 	xdl_free(cf->rchash); | ||||||
| 	xdl_cha_free(&cf->ncha); | 	xdl_cha_free(&cf->ncha); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits, | static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash, | ||||||
| 			       xrecord_t *rec) { | 			       unsigned int hbits, xrecord_t *rec) { | ||||||
| 	long hi; | 	long hi; | ||||||
| 	char const *line; | 	char const *line; | ||||||
| 	xdlclass_t *rcrec; | 	xdlclass_t *rcrec; | ||||||
|  | 	xdlclass_t **rcrecs; | ||||||
|  |  | ||||||
| 	line = rec->ptr; | 	line = rec->ptr; | ||||||
| 	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); | 	hi = (long) XDL_HASHLONG(rec->ha, cf->hbits); | ||||||
|  | @ -115,13 +128,25 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned | ||||||
| 			return -1; | 			return -1; | ||||||
| 		} | 		} | ||||||
| 		rcrec->idx = cf->count++; | 		rcrec->idx = cf->count++; | ||||||
|  | 		if (cf->count > cf->alloc) { | ||||||
|  | 			cf->alloc *= 2; | ||||||
|  | 			if (!(rcrecs = (xdlclass_t **) xdl_realloc(cf->rcrecs, cf->alloc * sizeof(xdlclass_t *)))) { | ||||||
|  |  | ||||||
|  | 				return -1; | ||||||
|  | 			} | ||||||
|  | 			cf->rcrecs = rcrecs; | ||||||
|  | 		} | ||||||
|  | 		cf->rcrecs[rcrec->idx] = rcrec; | ||||||
| 		rcrec->line = line; | 		rcrec->line = line; | ||||||
| 		rcrec->size = rec->size; | 		rcrec->size = rec->size; | ||||||
| 		rcrec->ha = rec->ha; | 		rcrec->ha = rec->ha; | ||||||
|  | 		rcrec->len1 = rcrec->len2 = 0; | ||||||
| 		rcrec->next = cf->rchash[hi]; | 		rcrec->next = cf->rchash[hi]; | ||||||
| 		cf->rchash[hi] = rcrec; | 		cf->rchash[hi] = rcrec; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	(pass == 1) ? rcrec->len1++ : rcrec->len2++; | ||||||
|  |  | ||||||
| 	rec->ha = (unsigned long) rcrec->idx; | 	rec->ha = (unsigned long) rcrec->idx; | ||||||
|  |  | ||||||
| 	hi = (long) XDL_HASHLONG(rec->ha, hbits); | 	hi = (long) XDL_HASHLONG(rec->ha, hbits); | ||||||
|  | @ -132,7 +157,7 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp, | ||||||
| 			   xdlclassifier_t *cf, xdfile_t *xdf) { | 			   xdlclassifier_t *cf, xdfile_t *xdf) { | ||||||
| 	unsigned int hbits; | 	unsigned int hbits; | ||||||
| 	long nrec, hsize, bsize; | 	long nrec, hsize, bsize; | ||||||
|  | @ -185,7 +210,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp, | ||||||
| 			recs[nrec++] = crec; | 			recs[nrec++] = crec; | ||||||
|  |  | ||||||
| 			if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && | 			if (!(xpp->flags & XDF_HISTOGRAM_DIFF) && | ||||||
| 				xdl_classify_record(cf, rhash, hbits, crec) < 0) | 				xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) | ||||||
| 				goto abort; | 				goto abort; | ||||||
| 		} | 		} | ||||||
| 	} | 	} | ||||||
|  | @ -257,30 +282,30 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp, | ||||||
| 		return -1; | 		return -1; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { | 	if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) { | ||||||
|  |  | ||||||
| 		xdl_free_classifier(&cf); | 		xdl_free_classifier(&cf); | ||||||
| 		return -1; | 		return -1; | ||||||
| 	} | 	} | ||||||
| 	if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { | 	if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) { | ||||||
|  |  | ||||||
| 		xdl_free_ctx(&xe->xdf1); | 		xdl_free_ctx(&xe->xdf1); | ||||||
| 		xdl_free_classifier(&cf); | 		xdl_free_classifier(&cf); | ||||||
| 		return -1; | 		return -1; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if (!(xpp->flags & XDF_HISTOGRAM_DIFF)) |  | ||||||
| 		xdl_free_classifier(&cf); |  | ||||||
|  |  | ||||||
| 	if (!(xpp->flags & XDF_PATIENCE_DIFF) && | 	if (!(xpp->flags & XDF_PATIENCE_DIFF) && | ||||||
| 			!(xpp->flags & XDF_HISTOGRAM_DIFF) && | 			!(xpp->flags & XDF_HISTOGRAM_DIFF) && | ||||||
| 			xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) { | 			xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) { | ||||||
|  |  | ||||||
| 		xdl_free_ctx(&xe->xdf2); | 		xdl_free_ctx(&xe->xdf2); | ||||||
| 		xdl_free_ctx(&xe->xdf1); | 		xdl_free_ctx(&xe->xdf1); | ||||||
| 		return -1; | 		return -1; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
|  | 	if (!(xpp->flags & XDF_HISTOGRAM_DIFF)) | ||||||
|  | 		xdl_free_classifier(&cf); | ||||||
|  |  | ||||||
| 	return 0; | 	return 0; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | @ -355,11 +380,10 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) { | ||||||
|  * matches on the other file. Also, lines that have multiple matches |  * matches on the other file. Also, lines that have multiple matches | ||||||
|  * might be potentially discarded if they happear in a run of discardable. |  * might be potentially discarded if they happear in a run of discardable. | ||||||
|  */ |  */ | ||||||
| static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) { | static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { | ||||||
| 	long i, nm, rhi, nreff, mlim; | 	long i, nm, nreff; | ||||||
| 	unsigned long hav; |  | ||||||
| 	xrecord_t **recs; | 	xrecord_t **recs; | ||||||
| 	xrecord_t *rec; | 	xdlclass_t *rcrec; | ||||||
| 	char *dis, *dis1, *dis2; | 	char *dis, *dis1, *dis2; | ||||||
|  |  | ||||||
| 	if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { | 	if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) { | ||||||
|  | @ -370,26 +394,16 @@ static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) { | ||||||
| 	dis1 = dis; | 	dis1 = dis; | ||||||
| 	dis2 = dis1 + xdf1->nrec + 1; | 	dis2 = dis1 + xdf1->nrec + 1; | ||||||
|  |  | ||||||
| 	if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT) |  | ||||||
| 		mlim = XDL_MAX_EQLIMIT; |  | ||||||
| 	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { | 	for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) { | ||||||
| 		hav = (*recs)->ha; | 		rcrec = cf->rcrecs[(*recs)->ha]; | ||||||
| 		rhi = (long) XDL_HASHLONG(hav, xdf2->hbits); | 		nm = rcrec ? rcrec->len2 : 0; | ||||||
| 		for (nm = 0, rec = xdf2->rhash[rhi]; rec; rec = rec->next) | 		dis1[i] = (nm == 0) ? 0: 1; | ||||||
| 			if (rec->ha == hav && ++nm == mlim) |  | ||||||
| 				break; |  | ||||||
| 		dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT) |  | ||||||
| 		mlim = XDL_MAX_EQLIMIT; |  | ||||||
| 	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { | 	for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) { | ||||||
| 		hav = (*recs)->ha; | 		rcrec = cf->rcrecs[(*recs)->ha]; | ||||||
| 		rhi = (long) XDL_HASHLONG(hav, xdf1->hbits); | 		nm = rcrec ? rcrec->len1 : 0; | ||||||
| 		for (nm = 0, rec = xdf1->rhash[rhi]; rec; rec = rec->next) | 		dis2[i] = (nm == 0) ? 0: 1; | ||||||
| 			if (rec->ha == hav && ++nm == mlim) |  | ||||||
| 				break; |  | ||||||
| 		dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1; |  | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
| 	for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; | 	for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; | ||||||
|  | @ -451,10 +465,10 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) { | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2) { | static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) { | ||||||
|  |  | ||||||
| 	if (xdl_trim_ends(xdf1, xdf2) < 0 || | 	if (xdl_trim_ends(xdf1, xdf2) < 0 || | ||||||
| 	    xdl_cleanup_records(xdf1, xdf2) < 0) { | 	    xdl_cleanup_records(cf, xdf1, xdf2) < 0) { | ||||||
|  |  | ||||||
| 		return -1; | 		return -1; | ||||||
| 	} | 	} | ||||||
|  |  | ||||||
		Loading…
	
		Reference in New Issue
	
	 Junio C Hamano
						Junio C Hamano