OSDN > Developer > dhrname > Chamber > star > Commit

star
Fork

(Original repository, No fork origin)

R/O
HTTP
SSH
HTTPS

Commit

Commit MetaInfo

Revision	49e6db19fe256d3f6308693d3cc1700914055b24 (tree)
Time	2017-03-07 23:28:54
Author	dhrname <dhrname@user...>
Commiter	dhrname

Log Message

Modify the hiragana2token function

Change Summary

modified: source_code/shadowstar.c (diff)
modified: source_code/shadowstar.h (diff)
modified: source_code/startest.c (diff)

Incremental Difference

--- a/source_code/shadowstar.c
+++ b/source_code/shadowstar.c
@@ -92,24 +92,37 @@ ST_UTF8_Char_Code ST_Hiragana_Code = {
9292      u8"っ", u8"ゃ", u8"ゅ", u8"ょ",
9393      u8"ぱ", u8"ぴ", u8"ぷ", u8"ぺ", u8"ぽ",
9494      u8"ゐ", u8"ゑ", u8"ゔ", u8"ゕ", u8"ゖ",
95-     u8"゛", u8"゜", u8"ゝ", u8"ゞ"
95+     u8"゛", u8"゜", u8"ゝ", u8"ゞ",
96+     u8"、", u8"。"
9697     },
97-    89
98+    91
9899 };
99100 
100101 /*hiragana2token 関数
101- * ひらがなをトークン化する。
102+ * ひらがなや句読点をトークン化して、スキップする。
102103  * posはtextの現在位置。listposはarrayの現在位置*/
103-static inline uint_fast32_t hiragana2token(ST_Text_Code tc, uint_fast32_t listpos)
104+static inline uint_fast32_t hiragana2token(ST_Text_Code tc, uint_fast32_t listpos, size_t length)
104105 {
105106         uint_fast32_t local_length = 0;
106107         uint_fast32_t cache = 0;
107-        for (uint_fast16_t i = 0; i < ST_Hiragana_Code.length; i++)
108+        /*resultは文字列が一致したかどうかのチェック。0ならば、不一致。それ以外の数値は一致したとみなす*/
109+        size_t result = 0;
110+        while (local_length <= length)
108111         {
112+            result = 0;
109113             cache = tc.pos;
110114             tc.pos += local_length;
111-            local_length += text2token(ST_HIRAGANA_TOKEN, tc, listpos, ST_Hiragana_Code.map[i], ST_Hiragana_Code.cache_length[i]);
115+            for (uint_fast16_t i = 0; i < ST_Hiragana_Code.length; i++)
116+            {
117+                result += text2token(ST_FUNCTION_APPLY_TOKEN, tc, listpos, ST_Hiragana_Code.map[i], ST_Hiragana_Code.cache_length[i]);
118+            }
119+            local_length += result;
112120             tc.pos = cache;
121+            if (!result)
122+            {
123+                /*一致する文字列がなくなるまで続ける*/
124+                return local_length;
125+            }
113126         }
114127         return local_length;
115128 }
@@ -148,13 +161,18 @@ uint16_t *ST_tokenize(const int_least8_t *text, ST_Token_Mode *array)
148161     ST_Text_Code tc = {text, array, pos};
149162     for (uint_fast32_t i = 0; i < length; i++)
150163     {
151-        /*文字がマッチングした分だけ、posメンバが増える*/
164+        /*文字がマッチングした分だけ、textの現在位置を示すposメンバが増える*/
152165         tc.pos += text2token(ST_KAGIKAKKO_BEGIN_TOKEN, tc, i, u8"「", strlength[0])
153166             + text2token(ST_KAGIKAKKO_END_TOKEN, tc, i, u8"」", strlength[1])
154167             + text2token(ST_PARENTHESES_BEGIN_TOKEN, tc, i, u8"（", strlength[2])
155168             + text2token(ST_PARENTHESES_END_TOKEN, tc, i, u8"）", strlength[3])
156-            + hiragana2token(tc, i)
169+            + hiragana2token(tc, i, length)
157170             + text2token(ST_VARIABLE_TOKEN, tc, i, u8"愛", strlen(u8"愛"));
171+        if (tc.pos >= length)
172+        {
173+            /*現在位置がテキストの容量を超えた場合*/
174+            return array;
175+        }
158176     }
159177     return array;
160178 }

--- a/source_code/shadowstar.h
+++ b/source_code/shadowstar.h
@@ -55,7 +55,7 @@ void eprint_log (uint8_t*);
5555 /*トークン化したときの識別用のマジックナンバー*/
5656 typedef enum {
5757   ST_UNKNOWN_TOKEN = 0,
58-  ST_HIRAGANA_TOKEN,
58+  ST_FUNCTION_APPLY_TOKEN,
5959   ST_VARIABLE_TOKEN,
6060   ST_PARENTHESES_BEGIN_TOKEN,
6161   ST_PARENTHESES_END_TOKEN,
@@ -67,7 +67,7 @@ typedef enum {
6767  * 文字列をトークン化処理して、入力した配列にトークン番号を割り当てる。入力値はトークン化したい文章と配列*/
6868 uint16_t* ST_tokenize(const int_least8_t*, ST_Token_Mode*);
6969 
70-#define ST_CHAR_CODE_MAP_MAX 90
70+#define ST_CHAR_CODE_MAP_MAX 92
7171 
7272 /*UTF8用のマップ
7373  * このマップをもとに、文字列のパターンを照合する*/

--- a/source_code/startest.c
+++ b/source_code/startest.c
@@ -121,11 +121,25 @@ int main(int argc, char **argv)
121121     tokens1[3] = 0;
122122     ST_tokenize(u8"ああああ", tokens1);
123123     assert(tokens1[0] == 1);
124-    assert(tokens1[1] == 1);
125-    assert(tokens1[2] == 1);
126-    assert(tokens1[3] == 1);
124+    assert(tokens1[1] == 0);
125+    assert(tokens1[2] == 0);
126+    assert(tokens1[3] == 0);
127+    init_token(tokens1);
128+    tokens1[3] = 0;
129+    ST_tokenize(u8"、。。、", tokens1);
130+    assert(tokens1[0] == 1);
131+    assert(tokens1[1] == 0);
132+    assert(tokens1[2] == 0);
133+    assert(tokens1[3] == 0);
134+    init_token(tokens1);
135+    tokens1[3] = 0;
136+    ST_tokenize(u8"、あ。愛", tokens1);
137+    assert(tokens1[0] == 1);
138+    assert(tokens1[1] == 2);
139+    assert(tokens1[2] == 0);
140+    assert(tokens1[3] == 0);
127141     
128-    ST_Token_Mode tokenmode = ST_HIRAGANA_TOKEN;
142+    ST_Token_Mode tokenmode = ST_FUNCTION_APPLY_TOKEN;
129143     assert(tokenmode == 1);
130144     tokenmode = ST_UNKNOWN_TOKEN;
131145     assert(tokenmode == 0);

star Fork

Commit

Tags

Frequently used words (click to add to your profile)

Commit MetaInfo

Log Message

Change Summary

Incremental Difference

star
Fork