Kouhei Sutou
null+****@clear*****
Tue Aug 7 14:42:25 JST 2018
Kouhei Sutou 2018-08-07 14:42:25 +0900 (Tue, 07 Aug 2018) New Revision: 8aefa2556d76673ce2787ac330389276a495f9fd https://github.com/groonga/groonga/commit/8aefa2556d76673ce2787ac330389276a495f9fd Message: highlighter: add logs Modified files: lib/highlighter.c Modified: lib/highlighter.c (+108 -4) =================================================================== --- lib/highlighter.c 2018-08-07 14:05:02 +0900 (a62ba52a2) +++ lib/highlighter.c 2018-08-07 14:42:25 +0900 (8d9da019c) @@ -1,6 +1,7 @@ /* -*- c-basic-offset: 2 -*- */ /* Copyright(C) 2018 Brazil + Copyright(C) 2018 Kouhei Sutou <kou �� clear-code.com> This library is free software; you can redistribute it and/or modify it under the terms of the GNU Lesser General Public @@ -459,8 +460,35 @@ grn_ids_is_included(grn_id *ids, size_t n_ids, grn_id id) return GRN_FALSE; } +static void +grn_highlighter_log_location(grn_ctx *ctx, + grn_log_level level, + const char *tag, + grn_highlighter_location *location, + const char *text, + size_t text_length) +{ + if (!grn_logger_pass(ctx, level)) { + return; + } + + GRN_LOG(ctx, + level, + "%s[location] " + "[%" GRN_FMT_INT64U "...%" GRN_FMT_INT64U "](%u)[%s] <%.*s>", + tag, + location->offset, + location->offset + location->length, + location->first_character_length, + location->have_overlap ? "overlapped" : "not-overlapped", + (int)location->length, + text + location->offset); +} + static uint64_t grn_highlighter_highlight_lexicon_flush(grn_ctx *ctx, + grn_log_level log_level, + const char *tag, grn_highlighter *highlighter, const char *text, size_t text_length, @@ -468,6 +496,7 @@ grn_highlighter_highlight_lexicon_flush(grn_ctx *ctx, grn_highlighter_location *location, uint64_t offset) { + grn_highlighter_log_location(ctx, log_level, tag, location, text, text_length); if (location->offset > offset) { grn_text_escape_xml(ctx, output, @@ -496,6 +525,8 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, size_t text_length, grn_obj *output) { + const char *tag = "[highlighter][highlight][lexicon]"; + grn_log_level log_level = GRN_LOG_DEBUG; grn_token_cursor *cursor; grn_obj *lazy_keyword_ids = &(highlighter->lexicon.lazy_keyword_ids); grn_obj *token_ids = &(highlighter->lexicon.token_ids); @@ -513,11 +544,13 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, 0); if (!cursor) { ERR(ctx->rc, - "[highlighter][highlight][lexicon] failed to start tokenizing: %s", + "%s failed to start tokenizing: %s", + tag, ctx->errbuf); return; } + GRN_LOG(ctx, log_level, "%s[tokenize][start]", tag); while (cursor->status == GRN_TOKEN_CURSOR_DOING) { grn_id token_id = grn_token_cursor_next(ctx, cursor); grn_highlighter_location location; @@ -534,8 +567,15 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, grn_token_get_source_first_character_length(ctx, token); location.have_overlap = grn_token_have_overlap(ctx, token); GRN_TEXT_PUT(ctx, token_locations, &location, sizeof(location)); + grn_highlighter_log_location(ctx, + log_level, + tag, + &location, + text, + text_length); } grn_token_cursor_close(ctx, cursor); + GRN_LOG(ctx, log_level, "%s[tokenize][end]", tag); { grn_obj *lexicon = highlighter->lexicon.object; @@ -555,27 +595,53 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, &keyword, NULL, NULL); + GRN_LOG(ctx, + log_level, + "%s[prefix-search][start] %" GRN_FMT_SIZE ":<%.*s>", + tag, + i, + (int)keyword_length, + keyword); GRN_TABLE_EACH_BEGIN_MIN(ctx, lexicon, cursor, id, keyword, keyword_length, GRN_CURSOR_PREFIX) { - void *key; - int key_size; int added = 0; - key_size = grn_table_cursor_get_key(ctx, cursor, &key); { grn_encoding encoding = ctx->encoding; ctx->encoding = GRN_ENC_NONE; grn_table_add(ctx, chunks, &id, sizeof(grn_id), &added); ctx->encoding = encoding; } + if (grn_logger_pass(ctx, GRN_LOG_DEBUG)) { + void *key; + int key_size; + key_size = grn_table_cursor_get_key(ctx, cursor, &key); + GRN_LOG(ctx, + log_level, + "%s[prefix-search][%s] %" GRN_FMT_SIZE ":<%.*s>:<%.*s>", + tag, + added ? "added" : "not-added", + i, + (int)keyword_length, + keyword, + key_size, + (const char *)key); + } if (added) { GRN_RECORD_PUT(ctx, lazy_keyword_ids, id); } } GRN_TABLE_EACH_END(ctx, cursor); + GRN_LOG(ctx, + log_level, + "%s[prefix-search][end] %" GRN_FMT_SIZE ":<%.*s>", + tag, + i, + (int)keyword_length, + keyword); } } @@ -597,6 +663,11 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, for (i = 0; i < n_token_ids; i++) { grn_id chunk_id; + GRN_LOG(ctx, + log_level, + "%s[lcp-search][start] %" GRN_FMT_SIZE, + tag, + i); { grn_encoding encoding = ctx->encoding; /* token_id_chunk is a binary data */ @@ -608,6 +679,11 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, ctx->encoding = encoding; } if (chunk_id == GRN_ID_NIL) { + GRN_LOG(ctx, + log_level, + "%s[lcp-search][end][nonexistent] %" GRN_FMT_SIZE, + tag, + i); continue; } @@ -619,6 +695,9 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, grn_highlighter_location candidate; grn_highlighter_location *first = raw_token_locations + i; + candidate.have_overlap = GRN_FALSE; + candidate.first_character_length = 0; + { grn_encoding encoding = ctx->encoding; ctx->encoding = GRN_ENC_NONE; @@ -661,6 +740,17 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, } } GRN_TEXT_PUT(ctx, candidates, &candidate, sizeof(candidate)); + grn_highlighter_log_location(ctx, + log_level, + tag, + &candidate, + text, + text_length); + GRN_LOG(ctx, + log_level, + "%s[lcp-search][end] %" GRN_FMT_SIZE, + tag, + i); i += n_ids - 1; } } @@ -687,6 +777,11 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, grn_highlighter_location *raw_candidates = (grn_highlighter_location *)GRN_BULK_HEAD(candidates); + GRN_LOG(ctx, + log_level, + "%s[highlight][start] %" GRN_FMT_SIZE, + tag, + n_candidates); if (n_candidates == 0) { grn_text_escape_xml(ctx, output, text, text_length); } else { @@ -713,6 +808,8 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, } if (current->offset > previous->offset) { offset = grn_highlighter_highlight_lexicon_flush(ctx, + log_level, + tag, highlighter, text, text_length, @@ -723,6 +820,8 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, previous = current; } offset = grn_highlighter_highlight_lexicon_flush(ctx, + log_level, + tag, highlighter, text, text_length, @@ -736,6 +835,11 @@ grn_highlighter_highlight_lexicon(grn_ctx *ctx, text_length - offset); } } + GRN_LOG(ctx, + log_level, + "%s[highlight][end] %" GRN_FMT_SIZE, + tag, + n_candidates); } } -------------- next part -------------- HTML����������������������������... URL: https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20180807/bc4cf9bc/attachment-0001.htm