Mirror of the Vim source from https://github.com/vim/vim
Revision | 0491b9cafd44548460d5a7441531f79848fe8567 (tree) |
---|---|
Time | 2020-09-23 03:45:04 |
Author | Bram Moolenaar <Bram@vim....> |
Commiter | Bram Moolenaar |
patch 8.2.1726: fuzzy matching only works on strings
Commit: https://github.com/vim/vim/commit/4f73b8e9cc83f647b34002554a8bdf9abec0a82f
Author: Bram Moolenaar <Bram@vim.org>
Date: Tue Sep 22 20:33:50 2020 +0200
@@ -2641,7 +2641,10 @@ | ||
2641 | 2641 | matchdelete({id} [, {win}]) Number delete match identified by {id} |
2642 | 2642 | matchend({expr}, {pat} [, {start} [, {count}]]) |
2643 | 2643 | Number position where {pat} ends in {expr} |
2644 | -matchfuzzy({list}, {str}) List fuzzy match {str} in {list} | |
2644 | +matchfuzzy({list}, {str} [, {dict}]) | |
2645 | + List fuzzy match {str} in {list} | |
2646 | +matchfuzzypos({list}, {str} [, {dict}]) | |
2647 | + List fuzzy match {str} in {list} | |
2645 | 2648 | matchlist({expr}, {pat} [, {start} [, {count}]]) |
2646 | 2649 | List match and submatches of {pat} in {expr} |
2647 | 2650 | matchstr({expr}, {pat} [, {start} [, {count}]]) |
@@ -7311,12 +7314,25 @@ | ||
7311 | 7314 | GetText()->matchend('word') |
7312 | 7315 | |
7313 | 7316 | |
7314 | -matchfuzzy({list}, {str}) *matchfuzzy()* | |
7315 | - Returns a list with all the strings in {list} that fuzzy | |
7316 | - match {str}. The strings in the returned list are sorted | |
7317 | - based on the matching score. {str} is treated as a literal | |
7318 | - string and regular expression matching is NOT supported. | |
7319 | - The maximum supported {str} length is 256. | |
7317 | +matchfuzzy({list}, {str} [, {dict}]) *matchfuzzy()* | |
7318 | + If {list} is a list of strings, then returns a list with all | |
7319 | + the strings in {list} that fuzzy match {str}. The strings in | |
7320 | + the returned list are sorted based on the matching score. | |
7321 | + | |
7322 | + If {list} is a list of dictionaries, then the optional {dict} | |
7323 | + argument supports the following items: | |
7324 | + key key of the item which is fuzzy matched against | |
7325 | + {str}. The value of this item should be a | |
7326 | + string. | |
7327 | + text_cb |Funcref| that will be called for every item | |
7328 | + in {list} to get the text for fuzzy matching. | |
7329 | + This should accept a dictionary item as the | |
7330 | + argument and return the text for that item to | |
7331 | + use for fuzzy matching. | |
7332 | + | |
7333 | + {str} is treated as a literal string and regular expression | |
7334 | + matching is NOT supported. The maximum supported {str} length | |
7335 | + is 256. | |
7320 | 7336 | |
7321 | 7337 | If there are no matching strings or there is an error, then an |
7322 | 7338 | empty list is returned. If length of {str} is greater than |
@@ -7327,11 +7343,36 @@ | ||
7327 | 7343 | < results in ["clay"]. > |
7328 | 7344 | :echo getbufinfo()->map({_, v -> v.name})->matchfuzzy("ndl") |
7329 | 7345 | < results in a list of buffer names fuzzy matching "ndl". > |
7346 | + :echo getbufinfo()->matchfuzzy("ndl", {'key' : 'name'}) | |
7347 | +< results in a list of buffer information dicts with buffer | |
7348 | + names fuzzy matching "ndl". > | |
7349 | + :echo getbufinfo()->matchfuzzy("spl", | |
7350 | + \ {'text_cb' : {v -> v.name}}) | |
7351 | +< results in a list of buffer information dicts with buffer | |
7352 | + names fuzzy matching "spl". > | |
7330 | 7353 | :echo v:oldfiles->matchfuzzy("test") |
7331 | 7354 | < results in a list of file names fuzzy matching "test". > |
7332 | 7355 | :let l = readfile("buffer.c")->matchfuzzy("str") |
7333 | 7356 | < results in a list of lines in "buffer.c" fuzzy matching "str". |
7334 | 7357 | |
7358 | +matchfuzzypos({list}, {str} [, {dict}]) *matchfuzzypos()* | |
7359 | + Same as |matchfuzzy()|, but returns the list of matched | |
7360 | + strings and the list of character positions where characters | |
7361 | + in {str} matches. | |
7362 | + | |
7363 | + If {str} matches multiple times in a string, then only the | |
7364 | + positions for the best match is returned. | |
7365 | + | |
7366 | + If there are no matching strings or there is an error, then a | |
7367 | + list with two empty list items is returned. | |
7368 | + | |
7369 | + Example: > | |
7370 | + :echo matchfuzzypos(['testing'], 'tsg') | |
7371 | +< results in [['testing'], [[0, 2, 6]]] > | |
7372 | + :echo matchfuzzypos(['clay', 'lacy'], 'la') | |
7373 | +< results in [['lacy', 'clay'], [[0, 1], [1, 2]]] > | |
7374 | + :echo [{'text': 'hello', 'id' : 10}]->matchfuzzypos('ll', {'key' : 'text'}) | |
7375 | +< results in [{'id': 10, 'text': 'hello'}] [[2, 3]] | |
7335 | 7376 | |
7336 | 7377 | matchlist({expr}, {pat} [, {start} [, {count}]]) *matchlist()* |
7337 | 7378 | Same as |match()|, but return a |List|. The first item in the |
@@ -604,6 +604,7 @@ | ||
604 | 604 | match() position where a pattern matches in a string |
605 | 605 | matchend() position where a pattern match ends in a string |
606 | 606 | matchfuzzy() fuzzy matches a string in a list of strings |
607 | + matchfuzzypos() fuzzy matches a string in a list of strings | |
607 | 608 | matchstr() match of a pattern in a string |
608 | 609 | matchstrpos() match and positions of a pattern in a string |
609 | 610 | matchlist() like matchstr() and also return submatches |
@@ -753,7 +753,8 @@ | ||
753 | 753 | {"matcharg", 1, 1, FEARG_1, ret_list_string, f_matcharg}, |
754 | 754 | {"matchdelete", 1, 2, FEARG_1, ret_number, f_matchdelete}, |
755 | 755 | {"matchend", 2, 4, FEARG_1, ret_number, f_matchend}, |
756 | - {"matchfuzzy", 2, 2, FEARG_1, ret_list_string, f_matchfuzzy}, | |
756 | + {"matchfuzzy", 2, 3, FEARG_1, ret_list_string, f_matchfuzzy}, | |
757 | + {"matchfuzzypos", 2, 3, FEARG_1, ret_list_any, f_matchfuzzypos}, | |
757 | 758 | {"matchlist", 2, 4, FEARG_1, ret_list_string, f_matchlist}, |
758 | 759 | {"matchstr", 2, 4, FEARG_1, ret_string, f_matchstr}, |
759 | 760 | {"matchstrpos", 2, 4, FEARG_1, ret_list_any, f_matchstrpos}, |
@@ -37,4 +37,5 @@ | ||
37 | 37 | int get_spat_last_idx(void); |
38 | 38 | void f_searchcount(typval_T *argvars, typval_T *rettv); |
39 | 39 | void f_matchfuzzy(typval_T *argvars, typval_T *rettv); |
40 | +void f_matchfuzzypos(typval_T *argvars, typval_T *rettv); | |
40 | 41 | /* vim: set ft=c : */ |
@@ -4216,33 +4216,144 @@ | ||
4216 | 4216 | */ |
4217 | 4217 | typedef struct |
4218 | 4218 | { |
4219 | - listitem_T *item; | |
4219 | + listitem_T *item; | |
4220 | 4220 | int score; |
4221 | + list_T *lmatchpos; | |
4221 | 4222 | } fuzzyItem_T; |
4222 | 4223 | |
4224 | +// bonus for adjacent matches | |
4225 | +#define SEQUENTIAL_BONUS 15 | |
4226 | +// bonus if match occurs after a separator | |
4227 | +#define SEPARATOR_BONUS 30 | |
4228 | +// bonus if match is uppercase and prev is lower | |
4229 | +#define CAMEL_BONUS 30 | |
4230 | +// bonus if the first letter is matched | |
4231 | +#define FIRST_LETTER_BONUS 15 | |
4232 | +// penalty applied for every letter in str before the first match | |
4233 | +#define LEADING_LETTER_PENALTY -5 | |
4234 | +// maximum penalty for leading letters | |
4235 | +#define MAX_LEADING_LETTER_PENALTY -15 | |
4236 | +// penalty for every letter that doesn't match | |
4237 | +#define UNMATCHED_LETTER_PENALTY -1 | |
4238 | +// Score for a string that doesn't fuzzy match the pattern | |
4239 | +#define SCORE_NONE -9999 | |
4240 | + | |
4241 | +#define FUZZY_MATCH_RECURSION_LIMIT 10 | |
4242 | +// Maximum number of characters that can be fuzzy matched | |
4243 | +#define MAXMATCHES 256 | |
4244 | + | |
4245 | +typedef int_u matchidx_T; | |
4246 | + | |
4247 | +/* | |
4248 | + * Compute a score for a fuzzy matched string. The matching character locations | |
4249 | + * are in 'matches'. | |
4250 | + */ | |
4251 | + static int | |
4252 | +fuzzy_match_compute_score( | |
4253 | + char_u *str, | |
4254 | + int strSz, | |
4255 | + matchidx_T *matches, | |
4256 | + int numMatches) | |
4257 | +{ | |
4258 | + int score; | |
4259 | + int penalty; | |
4260 | + int unmatched; | |
4261 | + int i; | |
4262 | + char_u *p = str; | |
4263 | + matchidx_T sidx = 0; | |
4264 | + | |
4265 | + // Initialize score | |
4266 | + score = 100; | |
4267 | + | |
4268 | + // Apply leading letter penalty | |
4269 | + penalty = LEADING_LETTER_PENALTY * matches[0]; | |
4270 | + if (penalty < MAX_LEADING_LETTER_PENALTY) | |
4271 | + penalty = MAX_LEADING_LETTER_PENALTY; | |
4272 | + score += penalty; | |
4273 | + | |
4274 | + // Apply unmatched penalty | |
4275 | + unmatched = strSz - numMatches; | |
4276 | + score += UNMATCHED_LETTER_PENALTY * unmatched; | |
4277 | + | |
4278 | + // Apply ordering bonuses | |
4279 | + for (i = 0; i < numMatches; ++i) | |
4280 | + { | |
4281 | + matchidx_T currIdx = matches[i]; | |
4282 | + | |
4283 | + if (i > 0) | |
4284 | + { | |
4285 | + matchidx_T prevIdx = matches[i - 1]; | |
4286 | + | |
4287 | + // Sequential | |
4288 | + if (currIdx == (prevIdx + 1)) | |
4289 | + score += SEQUENTIAL_BONUS; | |
4290 | + } | |
4291 | + | |
4292 | + // Check for bonuses based on neighbor character value | |
4293 | + if (currIdx > 0) | |
4294 | + { | |
4295 | + // Camel case | |
4296 | + int neighbor; | |
4297 | + int curr; | |
4298 | + int neighborSeparator; | |
4299 | + | |
4300 | + if (has_mbyte) | |
4301 | + { | |
4302 | + while (sidx < currIdx) | |
4303 | + { | |
4304 | + neighbor = (*mb_ptr2char)(p); | |
4305 | + (void)mb_ptr2char_adv(&p); | |
4306 | + sidx++; | |
4307 | + } | |
4308 | + curr = (*mb_ptr2char)(p); | |
4309 | + } | |
4310 | + else | |
4311 | + { | |
4312 | + neighbor = str[currIdx - 1]; | |
4313 | + curr = str[currIdx]; | |
4314 | + } | |
4315 | + | |
4316 | + if (vim_islower(neighbor) && vim_isupper(curr)) | |
4317 | + score += CAMEL_BONUS; | |
4318 | + | |
4319 | + // Separator | |
4320 | + neighborSeparator = neighbor == '_' || neighbor == ' '; | |
4321 | + if (neighborSeparator) | |
4322 | + score += SEPARATOR_BONUS; | |
4323 | + } | |
4324 | + else | |
4325 | + { | |
4326 | + // First letter | |
4327 | + score += FIRST_LETTER_BONUS; | |
4328 | + } | |
4329 | + } | |
4330 | + return score; | |
4331 | +} | |
4332 | + | |
4223 | 4333 | static int |
4224 | 4334 | fuzzy_match_recursive( |
4225 | - char_u *fuzpat, | |
4226 | - char_u *str, | |
4227 | - int *outScore, | |
4228 | - char_u *strBegin, | |
4229 | - char_u *srcMatches, | |
4230 | - char_u *matches, | |
4231 | - int maxMatches, | |
4232 | - int nextMatch, | |
4233 | - int *recursionCount, | |
4234 | - int recursionLimit) | |
4335 | + char_u *fuzpat, | |
4336 | + char_u *str, | |
4337 | + matchidx_T strIdx, | |
4338 | + int *outScore, | |
4339 | + char_u *strBegin, | |
4340 | + int strLen, | |
4341 | + matchidx_T *srcMatches, | |
4342 | + matchidx_T *matches, | |
4343 | + int maxMatches, | |
4344 | + int nextMatch, | |
4345 | + int *recursionCount) | |
4235 | 4346 | { |
4236 | 4347 | // Recursion params |
4237 | 4348 | int recursiveMatch = FALSE; |
4238 | - char_u bestRecursiveMatches[256]; | |
4349 | + matchidx_T bestRecursiveMatches[MAXMATCHES]; | |
4239 | 4350 | int bestRecursiveScore = 0; |
4240 | 4351 | int first_match; |
4241 | 4352 | int matched; |
4242 | 4353 | |
4243 | 4354 | // Count recursions |
4244 | 4355 | ++*recursionCount; |
4245 | - if (*recursionCount >= recursionLimit) | |
4356 | + if (*recursionCount >= FUZZY_MATCH_RECURSION_LIMIT) | |
4246 | 4357 | return FALSE; |
4247 | 4358 | |
4248 | 4359 | // Detect end of strings |
@@ -4251,13 +4362,20 @@ | ||
4251 | 4362 | |
4252 | 4363 | // Loop through fuzpat and str looking for a match |
4253 | 4364 | first_match = TRUE; |
4254 | - while (*fuzpat != '\0' && *str != '\0') | |
4365 | + while (*fuzpat != NUL && *str != NUL) | |
4255 | 4366 | { |
4367 | + int c1; | |
4368 | + int c2; | |
4369 | + | |
4370 | + c1 = PTR2CHAR(fuzpat); | |
4371 | + c2 = PTR2CHAR(str); | |
4372 | + | |
4256 | 4373 | // Found match |
4257 | - if (vim_tolower(*fuzpat) == vim_tolower(*str)) | |
4374 | + if (vim_tolower(c1) == vim_tolower(c2)) | |
4258 | 4375 | { |
4259 | - char_u recursiveMatches[256]; | |
4376 | + matchidx_T recursiveMatches[MAXMATCHES]; | |
4260 | 4377 | int recursiveScore = 0; |
4378 | + char_u *next_char; | |
4261 | 4379 | |
4262 | 4380 | // Supplied matches buffer was too short |
4263 | 4381 | if (nextMatch >= maxMatches) |
@@ -4266,116 +4384,58 @@ | ||
4266 | 4384 | // "Copy-on-Write" srcMatches into matches |
4267 | 4385 | if (first_match && srcMatches) |
4268 | 4386 | { |
4269 | - memcpy(matches, srcMatches, nextMatch); | |
4387 | + memcpy(matches, srcMatches, nextMatch * sizeof(srcMatches[0])); | |
4270 | 4388 | first_match = FALSE; |
4271 | 4389 | } |
4272 | 4390 | |
4273 | 4391 | // Recursive call that "skips" this match |
4274 | - if (fuzzy_match_recursive(fuzpat, str + 1, &recursiveScore, | |
4275 | - strBegin, matches, recursiveMatches, | |
4276 | - sizeof(recursiveMatches), nextMatch, recursionCount, | |
4277 | - recursionLimit)) | |
4392 | + if (has_mbyte) | |
4393 | + next_char = str + (*mb_ptr2len)(str); | |
4394 | + else | |
4395 | + next_char = str + 1; | |
4396 | + if (fuzzy_match_recursive(fuzpat, next_char, strIdx + 1, | |
4397 | + &recursiveScore, strBegin, strLen, matches, | |
4398 | + recursiveMatches, | |
4399 | + sizeof(recursiveMatches)/sizeof(recursiveMatches[0]), | |
4400 | + nextMatch, recursionCount)) | |
4278 | 4401 | { |
4279 | 4402 | // Pick best recursive score |
4280 | 4403 | if (!recursiveMatch || recursiveScore > bestRecursiveScore) |
4281 | 4404 | { |
4282 | - memcpy(bestRecursiveMatches, recursiveMatches, 256); | |
4405 | + memcpy(bestRecursiveMatches, recursiveMatches, | |
4406 | + MAXMATCHES * sizeof(recursiveMatches[0])); | |
4283 | 4407 | bestRecursiveScore = recursiveScore; |
4284 | 4408 | } |
4285 | 4409 | recursiveMatch = TRUE; |
4286 | 4410 | } |
4287 | 4411 | |
4288 | 4412 | // Advance |
4289 | - matches[nextMatch++] = (char_u)(str - strBegin); | |
4290 | - ++fuzpat; | |
4413 | + matches[nextMatch++] = strIdx; | |
4414 | + if (has_mbyte) | |
4415 | + (void)mb_ptr2char_adv(&fuzpat); | |
4416 | + else | |
4417 | + ++fuzpat; | |
4291 | 4418 | } |
4292 | - ++str; | |
4419 | + if (has_mbyte) | |
4420 | + (void)mb_ptr2char_adv(&str); | |
4421 | + else | |
4422 | + ++str; | |
4423 | + strIdx++; | |
4293 | 4424 | } |
4294 | 4425 | |
4295 | 4426 | // Determine if full fuzpat was matched |
4296 | - matched = *fuzpat == '\0' ? TRUE : FALSE; | |
4427 | + matched = *fuzpat == NUL ? TRUE : FALSE; | |
4297 | 4428 | |
4298 | 4429 | // Calculate score |
4299 | 4430 | if (matched) |
4300 | - { | |
4301 | - // bonus for adjacent matches | |
4302 | - int sequential_bonus = 15; | |
4303 | - // bonus if match occurs after a separator | |
4304 | - int separator_bonus = 30; | |
4305 | - // bonus if match is uppercase and prev is lower | |
4306 | - int camel_bonus = 30; | |
4307 | - // bonus if the first letter is matched | |
4308 | - int first_letter_bonus = 15; | |
4309 | - // penalty applied for every letter in str before the first match | |
4310 | - int leading_letter_penalty = -5; | |
4311 | - // maximum penalty for leading letters | |
4312 | - int max_leading_letter_penalty = -15; | |
4313 | - // penalty for every letter that doesn't matter | |
4314 | - int unmatched_letter_penalty = -1; | |
4315 | - int penalty; | |
4316 | - int unmatched; | |
4317 | - int i; | |
4318 | - | |
4319 | - // Iterate str to end | |
4320 | - while (*str != '\0') | |
4321 | - ++str; | |
4322 | - | |
4323 | - // Initialize score | |
4324 | - *outScore = 100; | |
4325 | - | |
4326 | - // Apply leading letter penalty | |
4327 | - penalty = leading_letter_penalty * matches[0]; | |
4328 | - if (penalty < max_leading_letter_penalty) | |
4329 | - penalty = max_leading_letter_penalty; | |
4330 | - *outScore += penalty; | |
4331 | - | |
4332 | - // Apply unmatched penalty | |
4333 | - unmatched = (int)(str - strBegin) - nextMatch; | |
4334 | - *outScore += unmatched_letter_penalty * unmatched; | |
4335 | - | |
4336 | - // Apply ordering bonuses | |
4337 | - for (i = 0; i < nextMatch; ++i) | |
4338 | - { | |
4339 | - char_u currIdx = matches[i]; | |
4340 | - | |
4341 | - if (i > 0) | |
4342 | - { | |
4343 | - char_u prevIdx = matches[i - 1]; | |
4344 | - | |
4345 | - // Sequential | |
4346 | - if (currIdx == (prevIdx + 1)) | |
4347 | - *outScore += sequential_bonus; | |
4348 | - } | |
4349 | - | |
4350 | - // Check for bonuses based on neighbor character value | |
4351 | - if (currIdx > 0) | |
4352 | - { | |
4353 | - // Camel case | |
4354 | - char_u neighbor = strBegin[currIdx - 1]; | |
4355 | - char_u curr = strBegin[currIdx]; | |
4356 | - int neighborSeparator; | |
4357 | - | |
4358 | - if (islower(neighbor) && isupper(curr)) | |
4359 | - *outScore += camel_bonus; | |
4360 | - | |
4361 | - // Separator | |
4362 | - neighborSeparator = neighbor == '_' || neighbor == ' '; | |
4363 | - if (neighborSeparator) | |
4364 | - *outScore += separator_bonus; | |
4365 | - } | |
4366 | - else | |
4367 | - { | |
4368 | - // First letter | |
4369 | - *outScore += first_letter_bonus; | |
4370 | - } | |
4371 | - } | |
4372 | - } | |
4431 | + *outScore = fuzzy_match_compute_score(strBegin, strLen, matches, | |
4432 | + nextMatch); | |
4373 | 4433 | |
4374 | 4434 | // Return best result |
4375 | 4435 | if (recursiveMatch && (!matched || bestRecursiveScore > *outScore)) |
4376 | 4436 | { |
4377 | 4437 | // Recursive score is better than "this" |
4378 | - memcpy(matches, bestRecursiveMatches, maxMatches); | |
4438 | + memcpy(matches, bestRecursiveMatches, maxMatches * sizeof(matches[0])); | |
4379 | 4439 | *outScore = bestRecursiveScore; |
4380 | 4440 | return TRUE; |
4381 | 4441 | } |
@@ -4394,22 +4454,27 @@ | ||
4394 | 4454 | * normalized and varies with pattern. |
4395 | 4455 | * Recursion is limited internally (default=10) to prevent degenerate cases |
4396 | 4456 | * (fuzpat="aaaaaa" str="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"). |
4397 | - * Uses char_u for match indices. Therefore patterns are limited to 256 | |
4457 | + * Uses char_u for match indices. Therefore patterns are limited to MAXMATCHES | |
4398 | 4458 | * characters. |
4399 | 4459 | * |
4400 | - * Returns TRUE if fuzpat is found AND calculates a score. | |
4460 | + * Returns TRUE if 'fuzpat' matches 'str'. Also returns the match score in | |
4461 | + * 'outScore' and the matching character positions in 'matches'. | |
4401 | 4462 | */ |
4402 | 4463 | static int |
4403 | -fuzzy_match(char_u *str, char_u *fuzpat, int *outScore) | |
4464 | +fuzzy_match( | |
4465 | + char_u *str, | |
4466 | + char_u *fuzpat, | |
4467 | + int *outScore, | |
4468 | + matchidx_T *matches, | |
4469 | + int maxMatches) | |
4404 | 4470 | { |
4405 | - char_u matches[256]; | |
4406 | 4471 | int recursionCount = 0; |
4407 | - int recursionLimit = 10; | |
4472 | + int len = MB_CHARLEN(str); | |
4408 | 4473 | |
4409 | 4474 | *outScore = 0; |
4410 | 4475 | |
4411 | - return fuzzy_match_recursive(fuzpat, str, outScore, str, NULL, matches, | |
4412 | - sizeof(matches), 0, &recursionCount, recursionLimit); | |
4476 | + return fuzzy_match_recursive(fuzpat, str, 0, outScore, str, len, NULL, | |
4477 | + matches, maxMatches, 0, &recursionCount); | |
4413 | 4478 | } |
4414 | 4479 | |
4415 | 4480 | /* |
@@ -4425,84 +4490,269 @@ | ||
4425 | 4490 | } |
4426 | 4491 | |
4427 | 4492 | /* |
4428 | - * Fuzzy search the string 'str' in 'strlist' and return the matching strings | |
4429 | - * in 'fmatchlist'. | |
4493 | + * Fuzzy search the string 'str' in a list of 'items' and return the matching | |
4494 | + * strings in 'fmatchlist'. | |
4495 | + * If 'items' is a list of strings, then search for 'str' in the list. | |
4496 | + * If 'items' is a list of dicts, then either use 'key' to lookup the string | |
4497 | + * for each item or use 'item_cb' Funcref function to get the string. | |
4498 | + * If 'retmatchpos' is TRUE, then return a list of positions where 'str' | |
4499 | + * matches for each item. | |
4430 | 4500 | */ |
4431 | 4501 | static void |
4432 | -match_fuzzy(list_T *strlist, char_u *str, list_T *fmatchlist) | |
4502 | +match_fuzzy( | |
4503 | + list_T *items, | |
4504 | + char_u *str, | |
4505 | + char_u *key, | |
4506 | + callback_T *item_cb, | |
4507 | + int retmatchpos, | |
4508 | + list_T *fmatchlist) | |
4433 | 4509 | { |
4434 | 4510 | long len; |
4435 | 4511 | fuzzyItem_T *ptrs; |
4436 | 4512 | listitem_T *li; |
4437 | 4513 | long i = 0; |
4438 | 4514 | int found_match = FALSE; |
4439 | - | |
4440 | - len = list_len(strlist); | |
4515 | + matchidx_T matches[MAXMATCHES]; | |
4516 | + | |
4517 | + len = list_len(items); | |
4441 | 4518 | if (len == 0) |
4442 | 4519 | return; |
4443 | 4520 | |
4444 | - ptrs = ALLOC_MULT(fuzzyItem_T, len); | |
4521 | + ptrs = ALLOC_CLEAR_MULT(fuzzyItem_T, len); | |
4445 | 4522 | if (ptrs == NULL) |
4446 | 4523 | return; |
4447 | 4524 | |
4448 | - // For all the string items in strlist, get the fuzzy matching score | |
4449 | - FOR_ALL_LIST_ITEMS(strlist, li) | |
4525 | + // For all the string items in items, get the fuzzy matching score | |
4526 | + FOR_ALL_LIST_ITEMS(items, li) | |
4450 | 4527 | { |
4451 | - int score; | |
4528 | + int score; | |
4529 | + char_u *itemstr; | |
4530 | + typval_T rettv; | |
4452 | 4531 | |
4453 | 4532 | ptrs[i].item = li; |
4454 | - ptrs[i].score = -9999; | |
4455 | - // ignore non-string items in the list | |
4456 | - if (li->li_tv.v_type == VAR_STRING && li->li_tv.vval.v_string != NULL) | |
4457 | - if (fuzzy_match(li->li_tv.vval.v_string, str, &score)) | |
4533 | + ptrs[i].score = SCORE_NONE; | |
4534 | + itemstr = NULL; | |
4535 | + rettv.v_type = VAR_UNKNOWN; | |
4536 | + if (li->li_tv.v_type == VAR_STRING) // list of strings | |
4537 | + itemstr = li->li_tv.vval.v_string; | |
4538 | + else if (li->li_tv.v_type == VAR_DICT && | |
4539 | + (key != NULL || item_cb->cb_name != NULL)) | |
4540 | + { | |
4541 | + // For a dict, either use the specified key to lookup the string or | |
4542 | + // use the specified callback function to get the string. | |
4543 | + if (key != NULL) | |
4544 | + itemstr = dict_get_string(li->li_tv.vval.v_dict, key, FALSE); | |
4545 | + else | |
4458 | 4546 | { |
4459 | - ptrs[i].score = score; | |
4460 | - found_match = TRUE; | |
4547 | + typval_T argv[2]; | |
4548 | + | |
4549 | + // Invoke the supplied callback (if any) to get the dict item | |
4550 | + li->li_tv.vval.v_dict->dv_refcount++; | |
4551 | + argv[0].v_type = VAR_DICT; | |
4552 | + argv[0].vval.v_dict = li->li_tv.vval.v_dict; | |
4553 | + argv[1].v_type = VAR_UNKNOWN; | |
4554 | + if (call_callback(item_cb, -1, &rettv, 1, argv) != FAIL) | |
4555 | + { | |
4556 | + if (rettv.v_type == VAR_STRING) | |
4557 | + itemstr = rettv.vval.v_string; | |
4558 | + } | |
4559 | + dict_unref(li->li_tv.vval.v_dict); | |
4461 | 4560 | } |
4561 | + } | |
4562 | + | |
4563 | + if (itemstr != NULL | |
4564 | + && fuzzy_match(itemstr, str, &score, matches, | |
4565 | + sizeof(matches) / sizeof(matches[0]))) | |
4566 | + { | |
4567 | + // Copy the list of matching positions in itemstr to a list, if | |
4568 | + // 'retmatchpos' is set. | |
4569 | + if (retmatchpos) | |
4570 | + { | |
4571 | + int j; | |
4572 | + int strsz; | |
4573 | + | |
4574 | + ptrs[i].lmatchpos = list_alloc(); | |
4575 | + if (ptrs[i].lmatchpos == NULL) | |
4576 | + goto done; | |
4577 | + strsz = MB_CHARLEN(str); | |
4578 | + for (j = 0; j < strsz; j++) | |
4579 | + { | |
4580 | + if (list_append_number(ptrs[i].lmatchpos, | |
4581 | + matches[j]) == FAIL) | |
4582 | + goto done; | |
4583 | + } | |
4584 | + } | |
4585 | + ptrs[i].score = score; | |
4586 | + found_match = TRUE; | |
4587 | + } | |
4462 | 4588 | ++i; |
4589 | + clear_tv(&rettv); | |
4463 | 4590 | } |
4464 | 4591 | |
4465 | 4592 | if (found_match) |
4466 | 4593 | { |
4594 | + list_T *l; | |
4595 | + | |
4467 | 4596 | // Sort the list by the descending order of the match score |
4468 | 4597 | qsort((void *)ptrs, (size_t)len, sizeof(fuzzyItem_T), |
4469 | 4598 | fuzzy_item_compare); |
4470 | 4599 | |
4471 | - // Copy the matching strings with 'score != -9999' to the return list | |
4600 | + // For matchfuzzy(), return a list of matched strings. | |
4601 | + // ['str1', 'str2', 'str3'] | |
4602 | + // For matchfuzzypos(), return a list with two items. | |
4603 | + // The first item is a list of matched strings. The second item | |
4604 | + // is a list of lists where each list item is a list of matched | |
4605 | + // character positions. | |
4606 | + // [['str1', 'str2', 'str3'], [[1, 3], [1, 3], [1, 3]]] | |
4607 | + if (retmatchpos) | |
4608 | + { | |
4609 | + li = list_find(fmatchlist, 0); | |
4610 | + if (li == NULL || li->li_tv.vval.v_list == NULL) | |
4611 | + goto done; | |
4612 | + l = li->li_tv.vval.v_list; | |
4613 | + } | |
4614 | + else | |
4615 | + l = fmatchlist; | |
4616 | + | |
4617 | + // Copy the matching strings with a valid score to the return list | |
4472 | 4618 | for (i = 0; i < len; i++) |
4473 | 4619 | { |
4474 | - if (ptrs[i].score == -9999) | |
4620 | + if (ptrs[i].score == SCORE_NONE) | |
4475 | 4621 | break; |
4476 | - list_append_string(fmatchlist, ptrs[i].item->li_tv.vval.v_string, | |
4477 | - -1); | |
4622 | + list_append_tv(l, &ptrs[i].item->li_tv); | |
4623 | + } | |
4624 | + | |
4625 | + // next copy the list of matching positions | |
4626 | + if (retmatchpos) | |
4627 | + { | |
4628 | + li = list_find(fmatchlist, -1); | |
4629 | + if (li == NULL || li->li_tv.vval.v_list == NULL) | |
4630 | + goto done; | |
4631 | + l = li->li_tv.vval.v_list; | |
4632 | + | |
4633 | + for (i = 0; i < len; i++) | |
4634 | + { | |
4635 | + if (ptrs[i].score == SCORE_NONE) | |
4636 | + break; | |
4637 | + if (ptrs[i].lmatchpos != NULL && | |
4638 | + list_append_list(l, ptrs[i].lmatchpos) == FAIL) | |
4639 | + goto done; | |
4640 | + } | |
4478 | 4641 | } |
4479 | 4642 | } |
4480 | 4643 | |
4644 | +done: | |
4481 | 4645 | vim_free(ptrs); |
4482 | 4646 | } |
4483 | 4647 | |
4484 | 4648 | /* |
4649 | + * Do fuzzy matching. Returns the list of matched strings in 'rettv'. | |
4650 | + * If 'retmatchpos' is TRUE, also returns the matching character positions. | |
4651 | + */ | |
4652 | + static void | |
4653 | +do_fuzzymatch(typval_T *argvars, typval_T *rettv, int retmatchpos) | |
4654 | +{ | |
4655 | + callback_T cb; | |
4656 | + char_u *key = NULL; | |
4657 | + int ret; | |
4658 | + | |
4659 | + CLEAR_POINTER(&cb); | |
4660 | + | |
4661 | + // validate and get the arguments | |
4662 | + if (argvars[0].v_type != VAR_LIST || argvars[0].vval.v_list == NULL) | |
4663 | + { | |
4664 | + semsg(_(e_listarg), retmatchpos ? "matchfuzzypos()" : "matchfuzzy()"); | |
4665 | + return; | |
4666 | + } | |
4667 | + if (argvars[1].v_type != VAR_STRING | |
4668 | + || argvars[1].vval.v_string == NULL) | |
4669 | + { | |
4670 | + semsg(_(e_invarg2), tv_get_string(&argvars[1])); | |
4671 | + return; | |
4672 | + } | |
4673 | + | |
4674 | + if (argvars[2].v_type != VAR_UNKNOWN) | |
4675 | + { | |
4676 | + dict_T *d; | |
4677 | + dictitem_T *di; | |
4678 | + | |
4679 | + if (argvars[2].v_type != VAR_DICT || argvars[2].vval.v_dict == NULL) | |
4680 | + { | |
4681 | + emsg(_(e_dictreq)); | |
4682 | + return; | |
4683 | + } | |
4684 | + | |
4685 | + // To search a dict, either a callback function or a key can be | |
4686 | + // specified. | |
4687 | + d = argvars[2].vval.v_dict; | |
4688 | + if ((di = dict_find(d, (char_u *)"key", -1)) != NULL) | |
4689 | + { | |
4690 | + if (di->di_tv.v_type != VAR_STRING | |
4691 | + || di->di_tv.vval.v_string == NULL | |
4692 | + || *di->di_tv.vval.v_string == NUL) | |
4693 | + { | |
4694 | + semsg(_(e_invarg2), tv_get_string(&di->di_tv)); | |
4695 | + return; | |
4696 | + } | |
4697 | + key = tv_get_string(&di->di_tv); | |
4698 | + } | |
4699 | + else if ((di = dict_find(d, (char_u *)"text_cb", -1)) != NULL) | |
4700 | + { | |
4701 | + cb = get_callback(&di->di_tv); | |
4702 | + if (cb.cb_name == NULL) | |
4703 | + { | |
4704 | + semsg(_(e_invargval), "text_cb"); | |
4705 | + return; | |
4706 | + } | |
4707 | + } | |
4708 | + } | |
4709 | + | |
4710 | + // get the fuzzy matches | |
4711 | + ret = rettv_list_alloc(rettv); | |
4712 | + if (ret != OK) | |
4713 | + goto done; | |
4714 | + if (retmatchpos) | |
4715 | + { | |
4716 | + list_T *l; | |
4717 | + | |
4718 | + // For matchfuzzypos(), a list with two items are returned. First item | |
4719 | + // is a list of matching strings and the second item is a list of | |
4720 | + // lists with matching positions within each string. | |
4721 | + l = list_alloc(); | |
4722 | + if (l == NULL) | |
4723 | + goto done; | |
4724 | + if (list_append_list(rettv->vval.v_list, l) == FAIL) | |
4725 | + goto done; | |
4726 | + l = list_alloc(); | |
4727 | + if (l == NULL) | |
4728 | + goto done; | |
4729 | + if (list_append_list(rettv->vval.v_list, l) == FAIL) | |
4730 | + goto done; | |
4731 | + } | |
4732 | + | |
4733 | + match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), key, | |
4734 | + &cb, retmatchpos, rettv->vval.v_list); | |
4735 | + | |
4736 | +done: | |
4737 | + free_callback(&cb); | |
4738 | +} | |
4739 | + | |
4740 | +/* | |
4485 | 4741 | * "matchfuzzy()" function |
4486 | 4742 | */ |
4487 | 4743 | void |
4488 | 4744 | f_matchfuzzy(typval_T *argvars, typval_T *rettv) |
4489 | 4745 | { |
4490 | - if (argvars[0].v_type != VAR_LIST) | |
4491 | - { | |
4492 | - emsg(_(e_listreq)); | |
4493 | - return; | |
4494 | - } | |
4495 | - if (argvars[0].vval.v_list == NULL) | |
4496 | - return; | |
4497 | - if (argvars[1].v_type != VAR_STRING | |
4498 | - || argvars[1].vval.v_string == NULL) | |
4499 | - { | |
4500 | - semsg(_(e_invarg2), tv_get_string(&argvars[1])); | |
4501 | - return; | |
4502 | - } | |
4503 | - if (rettv_list_alloc(rettv) == OK) | |
4504 | - match_fuzzy(argvars[0].vval.v_list, tv_get_string(&argvars[1]), | |
4505 | - rettv->vval.v_list); | |
4746 | + do_fuzzymatch(argvars, rettv, FALSE); | |
4747 | +} | |
4748 | + | |
4749 | +/* | |
4750 | + * "matchfuzzypos()" function | |
4751 | + */ | |
4752 | + void | |
4753 | +f_matchfuzzypos(typval_T *argvars, typval_T *rettv) | |
4754 | +{ | |
4755 | + do_fuzzymatch(argvars, rettv, TRUE); | |
4506 | 4756 | } |
4507 | 4757 | |
4508 | 4758 | #endif |
@@ -184,6 +184,7 @@ | ||
184 | 184 | test_match \ |
185 | 185 | test_matchadd_conceal \ |
186 | 186 | test_matchadd_conceal_utf8 \ |
187 | + test_matchfuzzy \ | |
187 | 188 | test_memory_usage \ |
188 | 189 | test_menu \ |
189 | 190 | test_messages \ |
@@ -420,6 +421,7 @@ | ||
420 | 421 | test_match.res \ |
421 | 422 | test_matchadd_conceal.res \ |
422 | 423 | test_matchadd_conceal_utf8.res \ |
424 | + test_matchfuzzy.res \ | |
423 | 425 | test_memory_usage.res \ |
424 | 426 | test_menu.res \ |
425 | 427 | test_messages.res \ |
@@ -2554,28 +2554,4 @@ | ||
2554 | 2554 | call assert_fails('call browsedir("open", [])', 'E730:') |
2555 | 2555 | endfunc |
2556 | 2556 | |
2557 | -" Test for matchfuzzy() | |
2558 | -func Test_matchfuzzy() | |
2559 | - call assert_fails('call matchfuzzy(10, "abc")', 'E714:') | |
2560 | - call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') | |
2561 | - call assert_equal([], matchfuzzy([], 'abc')) | |
2562 | - call assert_equal([], matchfuzzy(['abc'], '')) | |
2563 | - call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) | |
2564 | - call assert_equal([], matchfuzzy([10, 20], 'ac')) | |
2565 | - call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) | |
2566 | - call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) | |
2567 | - call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) | |
2568 | - call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) | |
2569 | - call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) | |
2570 | - call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) | |
2571 | - call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) | |
2572 | - call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) | |
2573 | - | |
2574 | - %bw! | |
2575 | - eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) | |
2576 | - let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') | |
2577 | - call assert_equal(1, len(l)) | |
2578 | - call assert_match('needle', l[0]) | |
2579 | -endfunc | |
2580 | - | |
2581 | 2557 | " vim: shiftwidth=2 sts=2 expandtab |
@@ -0,0 +1,188 @@ | ||
1 | +" Tests for fuzzy matching | |
2 | + | |
3 | +source shared.vim | |
4 | +source check.vim | |
5 | + | |
6 | +" Test for matchfuzzy() | |
7 | +func Test_matchfuzzy() | |
8 | + call assert_fails('call matchfuzzy(10, "abc")', 'E686:') | |
9 | + call assert_fails('call matchfuzzy(["abc"], [])', 'E730:') | |
10 | + call assert_fails("let x = matchfuzzy(test_null_list(), 'foo')", 'E686:') | |
11 | + call assert_fails('call matchfuzzy(["abc"], test_null_string())', 'E475:') | |
12 | + call assert_equal([], matchfuzzy([], 'abc')) | |
13 | + call assert_equal([], matchfuzzy(['abc'], '')) | |
14 | + call assert_equal(['abc'], matchfuzzy(['abc', 10], 'ac')) | |
15 | + call assert_equal([], matchfuzzy([10, 20], 'ac')) | |
16 | + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) | |
17 | + call assert_equal(['crayon', 'camera'], matchfuzzy(['camera', 'crayon'], 'cra')) | |
18 | + call assert_equal(['aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa', 'aba'], matchfuzzy(['aba', 'aabbaa', 'aaabbbaaa', 'aaaabbbbaaaa'], 'aa')) | |
19 | + call assert_equal(['one'], matchfuzzy(['one', 'two'], 'one')) | |
20 | + call assert_equal(['oneTwo', 'onetwo'], matchfuzzy(['onetwo', 'oneTwo'], 'oneTwo')) | |
21 | + call assert_equal(['one_two', 'onetwo'], matchfuzzy(['onetwo', 'one_two'], 'oneTwo')) | |
22 | + call assert_equal(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], matchfuzzy(['aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'], 'aa')) | |
23 | + call assert_equal(256, matchfuzzy([repeat('a', 256)], repeat('a', 256))[0]->len()) | |
24 | + call assert_equal([], matchfuzzy([repeat('a', 300)], repeat('a', 257))) | |
25 | + | |
26 | + " Tests for match preferences | |
27 | + " preference for camel case match | |
28 | + call assert_equal(['oneTwo', 'onetwo'], ['onetwo', 'oneTwo']->matchfuzzy('onetwo')) | |
29 | + " preference for match after a separator (_ or space) | |
30 | + call assert_equal(['one_two', 'one two', 'onetwo'], ['onetwo', 'one_two', 'one two']->matchfuzzy('onetwo')) | |
31 | + " preference for leading letter match | |
32 | + call assert_equal(['onetwo', 'xonetwo'], ['xonetwo', 'onetwo']->matchfuzzy('onetwo')) | |
33 | + " preference for sequential match | |
34 | + call assert_equal(['onetwo', 'oanbectdweo'], ['oanbectdweo', 'onetwo']->matchfuzzy('onetwo')) | |
35 | + " non-matching leading letter(s) penalty | |
36 | + call assert_equal(['xonetwo', 'xxonetwo'], ['xxonetwo', 'xonetwo']->matchfuzzy('onetwo')) | |
37 | + " total non-matching letter(s) penalty | |
38 | + call assert_equal(['one', 'onex', 'onexx'], ['onexx', 'one', 'onex']->matchfuzzy('one')) | |
39 | + | |
40 | + %bw! | |
41 | + eval ['somebuf', 'anotherone', 'needle', 'yetanotherone']->map({_, v -> bufadd(v) + bufload(v)}) | |
42 | + let l = getbufinfo()->map({_, v -> v.name})->matchfuzzy('ndl') | |
43 | + call assert_equal(1, len(l)) | |
44 | + call assert_match('needle', l[0]) | |
45 | + | |
46 | + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] | |
47 | + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'text_cb' : {v -> v.val}})) | |
48 | + call assert_equal([{'id' : 6, 'val' : 'camera'}], matchfuzzy(l, 'cam', {'key' : 'val'})) | |
49 | + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> v.val}})) | |
50 | + call assert_equal([], matchfuzzy(l, 'day', {'key' : 'val'})) | |
51 | + call assert_fails("let x = matchfuzzy(l, 'cam', 'random')", 'E715:') | |
52 | + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> []}})) | |
53 | + call assert_equal([], matchfuzzy(l, 'day', {'text_cb' : {v -> 1}})) | |
54 | + call assert_fails("let x = matchfuzzy(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') | |
55 | + call assert_equal([], matchfuzzy(l, 'cam')) | |
56 | + call assert_fails("let x = matchfuzzy(l, 'cam', {'text_cb' : []})", 'E921:') | |
57 | + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : []})", 'E730:') | |
58 | + call assert_fails("let x = matchfuzzy(l, 'cam', test_null_dict())", 'E715:') | |
59 | + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : test_null_string()})", 'E475:') | |
60 | + call assert_fails("let x = matchfuzzy(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') | |
61 | + | |
62 | + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] | |
63 | + call assert_fails("let x = matchfuzzy(l, 'foo', {'key' : 'name'})", 'E730:') | |
64 | + | |
65 | + " Test in latin1 encoding | |
66 | + let save_enc = &encoding | |
67 | + set encoding=latin1 | |
68 | + call assert_equal(['abc'], matchfuzzy(['abc'], 'abc')) | |
69 | + let &encoding = save_enc | |
70 | +endfunc | |
71 | + | |
72 | +" Test for the fuzzymatchpos() function | |
73 | +func Test_matchfuzzypos() | |
74 | + call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'curl'], 'rl')) | |
75 | + call assert_equal([['curl', 'world'], [[2,3], [2,3]]], matchfuzzypos(['world', 'one', 'curl'], 'rl')) | |
76 | + call assert_equal([['hello', 'hello world hello world'], | |
77 | + \ [[0, 1, 2, 3, 4], [0, 1, 2, 3, 4]]], | |
78 | + \ matchfuzzypos(['hello world hello world', 'hello', 'world'], 'hello')) | |
79 | + call assert_equal([['aaaaaaa'], [[0, 1, 2]]], matchfuzzypos(['aaaaaaa'], 'aaa')) | |
80 | + call assert_equal([[], []], matchfuzzypos(['world', 'curl'], 'ab')) | |
81 | + let x = matchfuzzypos([repeat('a', 256)], repeat('a', 256)) | |
82 | + call assert_equal(range(256), x[1][0]) | |
83 | + call assert_equal([[], []], matchfuzzypos([repeat('a', 300)], repeat('a', 257))) | |
84 | + call assert_equal([[], []], matchfuzzypos([], 'abc')) | |
85 | + | |
86 | + " match in a long string | |
87 | + call assert_equal([[repeat('x', 300) .. 'abc'], [[300, 301, 302]]], | |
88 | + \ matchfuzzypos([repeat('x', 300) .. 'abc'], 'abc')) | |
89 | + | |
90 | + " preference for camel case match | |
91 | + call assert_equal([['xabcxxaBc'], [[6, 7, 8]]], matchfuzzypos(['xabcxxaBc'], 'abc')) | |
92 | + " preference for match after a separator (_ or space) | |
93 | + call assert_equal([['xabx_ab'], [[5, 6]]], matchfuzzypos(['xabx_ab'], 'ab')) | |
94 | + " preference for leading letter match | |
95 | + call assert_equal([['abcxabc'], [[0, 1]]], matchfuzzypos(['abcxabc'], 'ab')) | |
96 | + " preference for sequential match | |
97 | + call assert_equal([['aobncedone'], [[7, 8, 9]]], matchfuzzypos(['aobncedone'], 'one')) | |
98 | + " best recursive match | |
99 | + call assert_equal([['xoone'], [[2, 3, 4]]], matchfuzzypos(['xoone'], 'one')) | |
100 | + | |
101 | + let l = [{'id' : 5, 'val' : 'crayon'}, {'id' : 6, 'val' : 'camera'}] | |
102 | + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], | |
103 | + \ matchfuzzypos(l, 'cam', {'text_cb' : {v -> v.val}})) | |
104 | + call assert_equal([[{'id' : 6, 'val' : 'camera'}], [[0, 1, 2]]], | |
105 | + \ matchfuzzypos(l, 'cam', {'key' : 'val'})) | |
106 | + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> v.val}})) | |
107 | + call assert_equal([[], []], matchfuzzypos(l, 'day', {'key' : 'val'})) | |
108 | + call assert_fails("let x = matchfuzzypos(l, 'cam', 'random')", 'E715:') | |
109 | + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> []}})) | |
110 | + call assert_equal([[], []], matchfuzzypos(l, 'day', {'text_cb' : {v -> 1}})) | |
111 | + call assert_fails("let x = matchfuzzypos(l, 'day', {'text_cb' : {a, b -> 1}})", 'E119:') | |
112 | + call assert_equal([[], []], matchfuzzypos(l, 'cam')) | |
113 | + call assert_fails("let x = matchfuzzypos(l, 'cam', {'text_cb' : []})", 'E921:') | |
114 | + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : []})", 'E730:') | |
115 | + call assert_fails("let x = matchfuzzypos(l, 'cam', test_null_dict())", 'E715:') | |
116 | + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : test_null_string()})", 'E475:') | |
117 | + call assert_fails("let x = matchfuzzypos(l, 'foo', {'text_cb' : test_null_function()})", 'E475:') | |
118 | + | |
119 | + let l = [{'id' : 5, 'name' : 'foo'}, {'id' : 6, 'name' : []}, {'id' : 7}] | |
120 | + call assert_fails("let x = matchfuzzypos(l, 'foo', {'key' : 'name'})", 'E730:') | |
121 | +endfunc | |
122 | + | |
123 | +func Test_matchfuzzy_mbyte() | |
124 | + CheckFeature multi_lang | |
125 | + call assert_equal(['ンヹㄇヺヴ'], matchfuzzy(['ンヹㄇヺヴ'], 'ヹヺ')) | |
126 | + " reverse the order of characters | |
127 | + call assert_equal([], matchfuzzy(['ンヹㄇヺヴ'], 'ヺヹ')) | |
128 | + call assert_equal(['αβΩxxx', 'xαxβxΩx'], | |
129 | + \ matchfuzzy(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) | |
130 | + call assert_equal(['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], | |
131 | + \ matchfuzzy(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) | |
132 | + | |
133 | + " preference for camel case match | |
134 | + call assert_equal(['oneĄwo', 'oneąwo'], | |
135 | + \ ['oneąwo', 'oneĄwo']->matchfuzzy('oneąwo')) | |
136 | + " preference for match after a separator (_ or space) | |
137 | + call assert_equal(['ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ', 'ⅠⅡabㄟㄠ'], | |
138 | + \ ['ⅠⅡabㄟㄠ', 'ⅠⅡa_bㄟㄠ', 'ⅠⅡa bㄟㄠ']->matchfuzzy('ⅠⅡabㄟㄠ')) | |
139 | + " preference for leading letter match | |
140 | + call assert_equal(['ŗŝţũŵż', 'xŗŝţũŵż'], | |
141 | + \ ['xŗŝţũŵż', 'ŗŝţũŵż']->matchfuzzy('ŗŝţũŵż')) | |
142 | + " preference for sequential match | |
143 | + call assert_equal(['ㄞㄡㄤfffifl', 'ㄞaㄡbㄤcffdfiefl'], | |
144 | + \ ['ㄞaㄡbㄤcffdfiefl', 'ㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) | |
145 | + " non-matching leading letter(s) penalty | |
146 | + call assert_equal(['xㄞㄡㄤfffifl', 'xxㄞㄡㄤfffifl'], | |
147 | + \ ['xxㄞㄡㄤfffifl', 'xㄞㄡㄤfffifl']->matchfuzzy('ㄞㄡㄤfffifl')) | |
148 | + " total non-matching letter(s) penalty | |
149 | + call assert_equal(['ŗŝţ', 'ŗŝţx', 'ŗŝţxx'], | |
150 | + \ ['ŗŝţxx', 'ŗŝţ', 'ŗŝţx']->matchfuzzy('ŗŝţ')) | |
151 | +endfunc | |
152 | + | |
153 | +func Test_matchfuzzypos_mbyte() | |
154 | + CheckFeature multi_lang | |
155 | + call assert_equal([['こんにちは世界'], [[0, 1, 2, 3, 4]]], | |
156 | + \ matchfuzzypos(['こんにちは世界'], 'こんにちは')) | |
157 | + call assert_equal([['ンヹㄇヺヴ'], [[1, 3]]], matchfuzzypos(['ンヹㄇヺヴ'], 'ヹヺ')) | |
158 | + " reverse the order of characters | |
159 | + call assert_equal([[], []], matchfuzzypos(['ンヹㄇヺヴ'], 'ヺヹ')) | |
160 | + call assert_equal([['αβΩxxx', 'xαxβxΩx'], [[0, 1, 2], [1, 3, 5]]], | |
161 | + \ matchfuzzypos(['αβΩxxx', 'xαxβxΩx'], 'αβΩ')) | |
162 | + call assert_equal([['ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ', 'πbπ'], | |
163 | + \ [[0, 1], [0, 1], [0, 1], [0, 2]]], | |
164 | + \ matchfuzzypos(['πbπ', 'ππbbππ', 'πππbbbπππ', 'ππππbbbbππππ'], 'ππ')) | |
165 | + call assert_equal([['ααααααα'], [[0, 1, 2]]], | |
166 | + \ matchfuzzypos(['ααααααα'], 'ααα')) | |
167 | + | |
168 | + call assert_equal([[], []], matchfuzzypos(['ンヹㄇ', 'ŗŝţ'], 'fffifl')) | |
169 | + let x = matchfuzzypos([repeat('Ψ', 256)], repeat('Ψ', 256)) | |
170 | + call assert_equal(range(256), x[1][0]) | |
171 | + call assert_equal([[], []], matchfuzzypos([repeat('✓', 300)], repeat('✓', 257))) | |
172 | + | |
173 | + " match in a long string | |
174 | + call assert_equal([[repeat('♪', 300) .. '✗✗✗'], [[300, 301, 302]]], | |
175 | + \ matchfuzzypos([repeat('♪', 300) .. '✗✗✗'], '✗✗✗')) | |
176 | + " preference for camel case match | |
177 | + call assert_equal([['xѳѵҁxxѳѴҁ'], [[6, 7, 8]]], matchfuzzypos(['xѳѵҁxxѳѴҁ'], 'ѳѵҁ')) | |
178 | + " preference for match after a separator (_ or space) | |
179 | + call assert_equal([['xちだx_ちだ'], [[5, 6]]], matchfuzzypos(['xちだx_ちだ'], 'ちだ')) | |
180 | + " preference for leading letter match | |
181 | + call assert_equal([['ѳѵҁxѳѵҁ'], [[0, 1]]], matchfuzzypos(['ѳѵҁxѳѵҁ'], 'ѳѵ')) | |
182 | + " preference for sequential match | |
183 | + call assert_equal([['aンbヹcㄇdンヹㄇ'], [[7, 8, 9]]], matchfuzzypos(['aンbヹcㄇdンヹㄇ'], 'ンヹㄇ')) | |
184 | + " best recursive match | |
185 | + call assert_equal([['xффйд'], [[2, 3, 4]]], matchfuzzypos(['xффйд'], 'фйд')) | |
186 | +endfunc | |
187 | + | |
188 | +" vim: shiftwidth=2 sts=2 expandtab |
@@ -751,6 +751,8 @@ | ||
751 | 751 | static int included_patches[] = |
752 | 752 | { /* Add new patch number below this line */ |
753 | 753 | /**/ |
754 | + 1726, | |
755 | +/**/ | |
754 | 756 | 1725, |
755 | 757 | /**/ |
756 | 758 | 1724, |