篠田です。
久しぶりにソフト製作欲が湧いてきたので、保留状態だったメール検索のため
の groonga テーブル設計を考えていたのですが、文書本体とメールのテーブ
ルを分けておいたほうがあとあと都合がいいと思い、以下のようにしてみまし
た。ところが、期待と違う検索結果が出てきてしまい、何かの制限事項に引っ
かかっているのか、または使い方の問題なのかで悩んでいます。
お忙しいところ申し訳ありませんが、ご教示お願いできませんでしょうか?
> groonga.exe --version
Groonga 12.0.0 [Windows,AMD64,utf8,match-escalation-threshold=0,nfkc,mecab,message-pack,mruby,onigmo,lz4,zstandard,rapidjson,apache-arrow,xxhash]
table_create Mails TABLE_HASH_KEY ShortText
table_create Contents TABLE_HASH_KEY UInt32
column_create Contents ctype COLUMN_SCALAR ShortText
column_create Contents filename COLUMN_SCALAR ShortText
column_create Contents body COLUMN_SCALAR LongText
column_create Contents msgid COLUMN_SCALAR Mails
column_create Mails subject COLUMN_SCALAR ShortText
column_create Mails contents COLUMN_VECTOR Contents
load --table Contents
[
{"_key":1, "ctype":"text", "body":"eee", "msgid": "<aaa @ bbb>"},
{"_key":2, "ctype":"word", "filename":"word.doc", "body":"ワード文書", "msgid": "<aaa @ bbb>"},
]
load --table Mails
[
{"_key":"<aaa @ bbb>", "subject":"subject", "contents":[1,2]},
]
select --table Mails --match_columns "contents.body" --query "eee"
--> [[0,1645746366.05,0.04999995231628418],[[[1],[["_id","UInt32"],["_key","ShortText"],["contents","Contents"],["subject","ShortText"]],[1,"<aaa @ bbb>",[1,2],"subject"]]]]
↑期待通り
select --table Mails --match_columns "subject || contents.body" --query "eee"
--> [[0,1645746366.101,0.0],[[[0],[["_id","UInt32"],["_key","ShortText"],["contents","Contents"],["subject","ShortText"]]]]]
↑なぜ上と同じ結果にならない?
select --table Mails --match_columns "subject" --query "subject"
--> [[0,1645745494.614,0.0009999275207519531],[[[1],[["_id","UInt32"],["_key","ShortText"],["contents","Contents"],["subject","ShortText"]],[1,"<aaa @ bbb>",[1,2],"subject"]]]]
↑期待通り
select --table Mails --match_columns "subject || contents.body" --query "subject"
--> [[0,1645745494.616,0.0],[[[0],[["_id","UInt32"],["_key","ShortText"],["contents","Contents"],["subject","ShortText"]]]]]
↑なぜ上と同じ結果にならない?
select --table Mails --match_columns "contents.body" --query "ワード"
--> [[0,1645746366.104,0.0],[[[0],[["_id","UInt32"],["_key","ShortText"],["contents","Contents"],["subject","ShortText"]]]]]
↑なぜ検索できない?(逆に言えば、最初のselectはなぜ検索できる?)
select --table Contents --match_columns "body" --query "ワード"
--> [[0,1645746366.105,0.0],[[[1],[["_id","UInt32"],["_key","UInt32"],["body","LongText"],["ctype","ShortText"],["filename","ShortText"],["msgid","Mails"]],[2,2,"ワード文書","word","word.doc","<aaa @ bbb>"]]]]
↑Contentsテーブルからだと検索できるけど、できればMailsテーブルか
らsubjectも一括で含めて検索がしたい
--
篠田 敦
shino****@jcom*****