[kazehakase-svn] [3431] *Generalize search expression from tool-bar

Back to archive index

svnno****@sourc***** svnno****@sourc*****
Sun Mar 2 03:51:37 JST 2008


Revision: 3431
          http://svn.sourceforge.jp/cgi-bin/viewcvs.cgi?root=kazehakase&view=rev&rev=3431
Author:   pal_gene
Date:     2008-03-02 03:51:36 +0900 (Sun, 02 Mar 2008)

Log Message:
-----------
*Generalize search expression from tool-bar
*Zenkaku white space convert (using UTF-8 Normalize and Composite)
*Support 'site:' expression
    usage:
    * Only show the google.com URI from history that contain word "kazehakase"
        history-search:kazehakase site:google.com
    * Drop google.com URI from history that contain word "kazehakase"
        history-search:kazehakase -site:google.com

Modified Paths:
--------------
    kazehakase/trunk/module/search/kz-hyper-estraier-search.c
    kazehakase/trunk/module/search/kz-search-common.h

Modified: kazehakase/trunk/module/search/kz-hyper-estraier-search.c
===================================================================
--- kazehakase/trunk/module/search/kz-hyper-estraier-search.c	2008-03-01 10:12:11 UTC (rev 3430)
+++ kazehakase/trunk/module/search/kz-hyper-estraier-search.c	2008-03-01 18:51:36 UTC (rev 3431)
@@ -258,15 +258,16 @@
 		G_OBJECT_CLASS(parent_class)->dispose(object);
 }
 
+
 static gchar *
 create_search_result_html (KzSearch *search, const gchar *text)
 {
 	ESTCOND *cond;
 	CBLIST *highlights;
 	int *results, n_results, i;
-	gchar *except_word, *tmp;
+	gchar *except_word, *tmp, *utf8;
 	gchar **texts;
-	GString *html, *phrase, *desc_str;
+	GString *html, *phrase, *attr_uri_phrase, *desc_str;
 	gint num_summary = 128, max_results = 20, half_of_summary;
 	KzHyperEstraierSearch *he_search;
 
@@ -279,54 +280,88 @@
 	cond = est_cond_new();
 	highlights = cblistopen();
 	
-	tmp = g_utf8_casefold(text, -1);
-	texts = g_strsplit(tmp, " ", -1);
+	/* convert from kz_conf except keyword */
+	phrase = g_string_new(text);
+	except_word = KZ_CONF_GET_STR("History", "except_keyword");
+	if (except_word && *except_word)
+	{
+		texts = g_strsplit(except_word, ",", -1);
+		g_free(except_word);
+		tmp = g_strjoinv(" -", texts);
+		g_strfreev(texts);
+		g_string_append(phrase, " -");
+		g_string_append(phrase, tmp);
+		g_free(tmp);
+	}
+	utf8 = g_locale_to_utf8(phrase->str, -1, NULL, NULL, NULL);
+	g_string_free(phrase, TRUE);
+	tmp = g_utf8_normalize(utf8, -1, G_NORMALIZE_ALL_COMPOSE);
+	g_free(utf8);
+	utf8 = g_utf8_strdown(tmp, -1);
 	g_free(tmp);
+	
+	/* start word split */
+	texts = g_strsplit(utf8, " ", -1);
+	g_free(utf8);
 	phrase = g_string_sized_new(0);
 
 	for (i = 0; texts[i]; i++)
 	{
-		if(*texts[i] == '-')
+		tmp = texts[i];
+		guint flag = 0;
+		gint cond_i;
+		if(!tmp || !*tmp || g_unichar_isspace(*tmp)) continue;
+		/* extract supported condition [ALL KZ_HISTORY_SEARCH CODE] */
+		for(cond_i = 0; cond_i < KZ_SEARCH_FLAG_SIZE; cond_i++)
 		{
-			g_string_append(phrase, " " ESTOPDIFF " ");
-			g_string_append(phrase, texts[i]+1);
+			if(g_str_has_prefix(tmp, KZ_SEARCH_FLAGS[cond_i].exp))
+			{
+				flag |= KZ_SEARCH_FLAGS[cond_i].mask;
+				tmp += strlen(KZ_SEARCH_FLAGS[cond_i].exp);
+			}
 		}
-		else{
-			cblistpush(highlights, texts[i], -1);
-			g_string_append(phrase, " " ESTOPISECT " ");
-			g_string_append(phrase, texts[i]);
+		
+		/* write out for search engine expression [ENGINE SPECIFIC CODE] */
+		switch(flag & KZ_SEARCH_FLAG_GROUP_OPTION) {
+		case KZ_SEARCH_FLAG_MASK_SITE:
+			attr_uri_phrase = g_string_sized_new(0);
+			g_string_printf(attr_uri_phrase,
+					"%s %s %s",
+					ESTDATTRURI,
+					(flag & KZ_SEARCH_FLAG_MASK_NOT)?"!ISTRINC":"ISTRINC",
+					tmp);
+			est_cond_add_attr(cond, attr_uri_phrase->str);
+			g_string_free(attr_uri_phrase, TRUE);
+			break;
+		default:
+			if(flag & KZ_SEARCH_FLAG_MASK_NOT)
+			{
+				g_string_append(phrase, " " ESTOPDIFF " ");
+			}
+			else
+			{
+				g_string_append(phrase, " " ESTOPISECT " ");
+				cblistpush(highlights, tmp, -1);
+			}
+			g_string_append(phrase, tmp);
 		}
 	}
 	g_strfreev(texts);
 
-	except_word = KZ_CONF_GET_STR("History", "except_keyword");
-	if (except_word && *except_word)
-	{
-		tmp = g_utf8_casefold(except_word, -1);
-		g_free(except_word);
-		texts = g_strsplit(tmp, ",", -1);
-		g_free(tmp);
-		i = 0;
-		while (texts[i])
-		{
-			g_string_append(phrase, " " ESTOPDIFF " ");
-			g_string_append(phrase, texts[i]);
-			i++;
-		}
-		g_strfreev(texts);
-	}
-	
 	est_cond_set_phrase(cond, phrase->str);	
 	g_string_free(phrase, TRUE);
 
+	/* start actual search */
 	KZ_CONF_GET("History", "num_summary", num_summary, INT);
 	KZ_CONF_GET("History", "max_results", max_results, INT);
 	half_of_summary = num_summary / 2;
 
 	results = est_mtdb_search(he_search->db, cond, &n_results, NULL);
 
-	desc_str = g_string_sized_new(num_summary*2 - 1);		/* almost typical in num_summary*2 */
-	html = g_string_sized_new((num_summary*2+512)*max_results);	/* typical in num_summary*2*max_results */
+	/* almost in typical num_summary*2 */
+	desc_str = g_string_sized_new(num_summary*2 - 1);
+	/* in typical (num_summary*2+html_tags)*max_results */
+	html = g_string_sized_new((num_summary*2+512)*max_results);
 
 	g_string_append(html, DTD"\n");
 	g_string_append(html, "<html>\n");

Modified: kazehakase/trunk/module/search/kz-search-common.h
===================================================================
--- kazehakase/trunk/module/search/kz-search-common.h	2008-03-01 10:12:11 UTC (rev 3430)
+++ kazehakase/trunk/module/search/kz-search-common.h	2008-03-01 18:51:36 UTC (rev 3431)
@@ -21,6 +21,8 @@
 #ifndef __KZ_SEARCH_COMMON_H__
 #define __KZ_SEARCH_COMMON_H__
 
+#include <glib.h>
+
 G_BEGIN_DECLS
 
 #define DTD   "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML 4.01//EN\" \"http://www.w3.org/TR/html4/strict.dtd\">"
@@ -47,4 +49,30 @@
 
 G_END_DECLS
 
+
+typedef struct {
+    unsigned int mask;
+    char *exp;
+} KZ_SEARCH_FLAG;
+
+#define KZ_SEARCH_FLAG_GROUP_OPERATOR	0x0000000f
+#define KZ_SEARCH_FLAG_MASK_NOT		0x00000001
+#define KZ_SEARCH_FLAG_GROUP_OPTION	0x000000f0		/* exclusive */
+#define KZ_SEARCH_FLAG_MASK_SITE	0x00000010
+
+//NOTE: synchronize FLAG_NUMBER and FLAGS order
+enum KZ_SEARCH_FLAG_NUMBER {
+	KZ_SEARCH_FLAG_NOT,
+	KZ_SEARCH_FLAG_SITE
+};
+
+KZ_SEARCH_FLAG KZ_SEARCH_FLAGS[] = {
+    { KZ_SEARCH_FLAG_MASK_NOT, "-"},
+    { KZ_SEARCH_FLAG_MASK_SITE, "site:"}
+};
+
+#define KZ_SEARCH_FLAG_SIZE (sizeof(KZ_SEARCH_FLAGS)/sizeof(KZ_SEARCH_FLAG))
+
+
+
 #endif /* __KZ_SEARCH_COMMON_H__ */




More information about the Kazehakase-cvs mailing list
Back to archive index