Yasuhiro Horimoto 2018-12-28 00:28:57 +0900 (Fri, 28 Dec 2018) Revision: 530f9dc98199a31b538fb6757dcd86dbc7e3965b https://github.com/groonga/groonga/commit/530f9dc98199a31b538fb6757dcd86dbc7e3965b Message: doc: separate TokenFilterStem and TokenFilterStopWord into other pages Modified files: doc/source/reference/token_filters.rst Modified: doc/source/reference/token_filters.rst (+4 -77) =================================================================== --- doc/source/reference/token_filters.rst 2018-12-28 00:28:38 +0900 (d29f720a1) +++ doc/source/reference/token_filters.rst 2018-12-28 00:28:57 +0900 (12c4f7b2d) @@ -35,84 +35,11 @@ Available token filters Here is the list of available token filters: -* ``TokenFilterStopWord`` -* ``TokenFilterStem`` +.. toctree:: + :maxdepth: 1 + :glob: -.. _token-filter-stop-word: - -``TokenFilterStopWord`` -^^^^^^^^^^^^^^^^^^^^^^^ - -``TokenFilterStopWord`` removes stop words from tokenized token -in searching the documents. - -``TokenFilterStopWord`` can specify stop word after adding the -documents because it removes token in searching the documents. - -The stop word is specified ``is_stop_word`` column on lexicon table. - -Here is an example that uses ``TokenFilterStopWord`` token filter: - -.. groonga-command -.. database: token_filters_stop_word -.. include:: ../example/reference/token_filters/stop_word.log -.. plugin_register token_filters/stop_word -.. table_create Memos TABLE_NO_KEY -.. column_create Memos content COLUMN_SCALAR ShortText -.. table_create Terms TABLE_PAT_KEY ShortText \ -.. --default_tokenizer TokenBigram \ -.. --normalizer NormalizerAuto \ -.. --token_filters TokenFilterStopWord -.. column_create Terms memos_content COLUMN_INDEX|WITH_POSITION Memos content -.. column_create Terms is_stop_word COLUMN_SCALAR Bool -.. load --table Terms -.. [ -.. {"_key": "and", "is_stop_word": true} -.. ] -.. load --table Memos -.. [ -.. {"content": "Hello"}, -.. {"content": "Hello and Good-bye"}, -.. {"content": "Good-bye"} -.. ] -.. select Memos --match_columns content --query "Hello and" - -``and`` token is marked as stop word in ``Terms`` table. - -``"Hello"`` that doesn't have ``and`` in content is matched. Because -``and`` is a stop word and ``and`` is removed from query. - -.. _token-filter-stem: - -``TokenFilterStem`` -^^^^^^^^^^^^^^^^^^^ - -``TokenFilterStem`` stems tokenized token. - -Here is an example that uses ``TokenFilterStem`` token filter: - -.. groonga-command -.. database: token_filters_stem -.. include:: ../example/reference/token_filters/stem.log -.. plugin_register token_filters/stem -.. table_create Memos TABLE_NO_KEY -.. column_create Memos content COLUMN_SCALAR ShortText -.. table_create Terms TABLE_PAT_KEY ShortText \ -.. --default_tokenizer TokenBigram \ -.. --normalizer NormalizerAuto \ -.. --token_filters TokenFilterStem -.. column_create Terms memos_content COLUMN_INDEX|WITH_POSITION Memos content -.. load --table Memos -.. [ -.. {"content": "I develop Groonga"}, -.. {"content": "I'm developing Groonga"}, -.. {"content": "I developed Groonga"} -.. ] -.. select Memos --match_columns content --query "develops" - -All of ``develop``, ``developing``, ``developed`` and ``develops`` -tokens are stemmed as ``develop``. So we can find ``develop``, -``developing`` and ``developed`` by ``develops`` query. + token_filters/* See also -------- -------------- next part -------------- An HTML attachment was scrubbed... URL: <https://lists.osdn.me/mailman/archives/groonga-commit/attachments/20181228/834b374f/attachment-0001.html>