[Groonga-commit] pgroonga/pgroonga at 106fe04 [master] pgroonga_score: improve performance for one column primary key case

Back to archive index

Kouhei Sutou null+****@clear*****
Sat Dec 24 00:10:36 JST 2016


Kouhei Sutou	2016-12-24 00:10:36 +0900 (Sat, 24 Dec 2016)

  New Revision: 106fe049f5886f818345f23f4264dd1519134834
  https://github.com/pgroonga/pgroonga/commit/106fe049f5886f818345f23f4264dd1519134834

  Message:
    pgroonga_score: improve performance for one column primary key case
    
    GitHub: fix #31
    
    100K rows case: 9sec -> 300msec
    
    Reported by yongxianggao-chanjet. Thanks!!!

  Modified files:
    src/pgroonga.c

  Modified: src/pgroonga.c (+139 -36)
===================================================================
--- src/pgroonga.c    2016-12-23 22:27:04 +0900 (db9da15)
+++ src/pgroonga.c    2016-12-24 00:10:36 +0900 (a804bb2)
@@ -906,24 +906,129 @@ PGrnSetSources(Relation index, grn_obj *sourcesTable)
 
 #ifdef PGRN_SUPPORT_SCORE
 static double
-PGrnCollectScoreScanOpaque(Relation table, HeapTuple tuple, PGrnScanOpaque so)
+PGrnCollectScoreScanOpaqueGetScore(Relation table,
+								   PGrnScanOpaque so,
+								   grn_id recordID)
+{
+	double score = 0.0;
+	grn_id id;
+	ItemPointerData ctid;
+
+	id = grn_table_get(ctx, so->searched, &recordID, sizeof(grn_id));
+	if (id == GRN_ID_NIL)
+		return 0.0;
+
+	GRN_BULK_REWIND(&(buffers->ctid));
+	grn_obj_get_value(ctx, so->ctidAccessor, id, &(buffers->ctid));
+	ctid = PGrnCtidUnpack(GRN_UINT64_VALUE(&(buffers->ctid)));
+
+	if (!PGrnCtidIsAlive(table, &ctid))
+		return 0.0;
+
+	GRN_BULK_REWIND(&(buffers->score));
+	grn_obj_get_value(ctx, so->scoreAccessor, id, &(buffers->score));
+	if (buffers->score.header.domain == GRN_DB_FLOAT)
+	{
+		score = GRN_FLOAT_VALUE(&(buffers->score));
+	}
+	else
+	{
+		score = GRN_INT32_VALUE(&(buffers->score));
+	}
+
+	return score;
+}
+
+static double
+PGrnCollectScoreScanOpaqueOneColumnPrimaryKey(Relation table,
+											  HeapTuple tuple,
+											  PGrnScanOpaque so)
 {
 	double score = 0.0;
 	TupleDesc desc;
-	grn_obj *expression;
-	grn_obj *variable;
-	slist_iter iter;
-	unsigned int nPrimaryKeyColumns = 0;
+	PGrnPrimaryKeyColumn *primaryKeyColumn;
+	grn_index_datum indexDatum;
+	grn_obj *lexicon;
+	grn_id termID;
+	grn_ii_cursor *iiCursor;
+	int iiNElements = 2;
+	grn_posting *posting;
 
-	if (so->dataTableID != tuple->t_tableOid)
+	desc = RelationGetDescr(table);
+	primaryKeyColumn = slist_container(PGrnPrimaryKeyColumn,
+									   node,
+									   so->primaryKeyColumns.head.next);
+
+	{
+		unsigned int nIndexData;
+
+		nIndexData = grn_column_find_index_data(ctx,
+												primaryKeyColumn->column,
+												GRN_OP_EQUAL,
+												&indexDatum,
+												1);
+		if (nIndexData == 0)
+			return 0.0;
+	}
+
+	lexicon = grn_ctx_at(ctx, indexDatum.index->header.domain);
+	if (!lexicon)
 		return 0.0;
 
-	if (!so->scoreAccessor)
+	{
+		bool isNULL;
+		Datum primaryKeyValue;
+
+		grn_obj_reinit(ctx,
+					   &(buffers->general),
+					   primaryKeyColumn->domain,
+					   primaryKeyColumn->flags);
+		primaryKeyValue = heap_getattr(tuple,
+									   primaryKeyColumn->number,
+									   desc,
+									   &isNULL);
+		PGrnConvertFromData(primaryKeyValue,
+							primaryKeyColumn->type,
+							&(buffers->general));
+	}
+	termID = grn_table_get(ctx,
+						   lexicon,
+						   GRN_BULK_HEAD(&(buffers->general)),
+						   GRN_BULK_VSIZE(&(buffers->general)));
+	if (termID == GRN_ID_NIL)
 		return 0.0;
 
-	if (slist_is_empty(&(so->primaryKeyColumns)))
+	iiCursor = grn_ii_cursor_open(ctx,
+								  (grn_ii *)(indexDatum.index),
+								  termID,
+								  GRN_ID_NIL,
+								  GRN_ID_NIL,
+								  iiNElements,
+								  0);
+	if (!iiCursor)
 		return 0.0;
 
+	while ((posting = grn_ii_cursor_next(ctx, iiCursor)))
+	{
+		score += PGrnCollectScoreScanOpaqueGetScore(table, so, posting->rid);
+	}
+	grn_ii_cursor_close(ctx, iiCursor);
+
+	return score;
+}
+
+static double
+PGrnCollectScoreScanOpaqueMultiColumnPrimaryKey(Relation table,
+												HeapTuple tuple,
+												PGrnScanOpaque so)
+{
+	double score = 0.0;
+	TupleDesc desc;
+	grn_obj *expression;
+	grn_obj *variable;
+	slist_iter iter;
+	unsigned int nPrimaryKeyColumns = 0;
+
 	desc = RelationGetDescr(table);
 
 	if (!so->scoreTargetRecords)
@@ -984,38 +1089,14 @@ PGrnCollectScoreScanOpaque(Relation table, HeapTuple tuple, PGrnScanOpaque so)
 											0, -1, GRN_CURSOR_ASCENDING);
 		while (grn_table_cursor_next(ctx, tableCursor) != GRN_ID_NIL)
 		{
+			void *key;
 			grn_id recordID;
-			grn_id id;
-			ItemPointerData ctid;
 
-			{
-				void *key;
-				grn_table_cursor_get_key(ctx, tableCursor, &key);
-				recordID = *((grn_id *) key);
-			}
+			grn_table_cursor_get_key(ctx, tableCursor, &key);
+			recordID = *((grn_id *) key);
 			grn_table_cursor_delete(ctx, tableCursor);
 
-			id = grn_table_get(ctx, so->searched, &recordID, sizeof(grn_id));
-			if (id == GRN_ID_NIL)
-				continue;
-
-			GRN_BULK_REWIND(&(buffers->ctid));
-			grn_obj_get_value(ctx, so->ctidAccessor, id, &(buffers->ctid));
-			ctid = PGrnCtidUnpack(GRN_UINT64_VALUE(&(buffers->ctid)));
-
-			if (!PGrnCtidIsAlive(table, &ctid))
-				continue;
-
-			GRN_BULK_REWIND(&(buffers->score));
-			grn_obj_get_value(ctx, so->scoreAccessor, id, &(buffers->score));
-			if (buffers->score.header.domain == GRN_DB_FLOAT)
-			{
-				score += GRN_FLOAT_VALUE(&(buffers->score));
-			}
-			else
-			{
-				score += GRN_INT32_VALUE(&(buffers->score));
-			}
+			score += PGrnCollectScoreScanOpaqueGetScore(table, so, recordID);
 		}
 		grn_obj_unlink(ctx, tableCursor);
 	}
@@ -1024,6 +1105,28 @@ PGrnCollectScoreScanOpaque(Relation table, HeapTuple tuple, PGrnScanOpaque so)
 }
 
 static double
+PGrnCollectScoreScanOpaque(Relation table, HeapTuple tuple, PGrnScanOpaque so)
+{
+	if (so->dataTableID != tuple->t_tableOid)
+		return 0.0;
+
+	if (!so->scoreAccessor)
+		return 0.0;
+
+	if (slist_is_empty(&(so->primaryKeyColumns)))
+		return 0.0;
+
+	if (so->primaryKeyColumns.head.next->next)
+	{
+		return PGrnCollectScoreScanOpaqueMultiColumnPrimaryKey(table, tuple, so);
+	}
+	else
+	{
+		return PGrnCollectScoreScanOpaqueOneColumnPrimaryKey(table, tuple, so);
+	}
+}
+
+static double
 PGrnCollectScore(Relation table, HeapTuple tuple)
 {
 	double score = 0.0;
-------------- next part --------------
HTML����������������������������...
Download 



More information about the Groonga-commit mailing list
Back to archive index