Kouhei Sutou
null+****@clear*****
Mon Feb 11 13:13:43 JST 2013
Kouhei Sutou 2013-02-11 13:13:43 +0900 (Mon, 11 Feb 2013) New Revision: 88a576fec066382d3efc2552fede2a82dcaf3a48 https://github.com/groonga/groonga-normalizer-mysql/commit/88a576fec066382d3efc2552fede2a82dcaf3a48 Log: plane -> page Because "plane" confuses with "plane" in Unicode. "plane" in strings/ctype-utf8.c is not "plane" in Unicode. So we don't use "plane". Modified files: normalizers/mysql.c normalizers/mysql_general_ci_table.h tool/dump_difference_utf8.rb tool/generate_utf8_table.rb tool/parser.rb Renamed files: test/suite/general_ci/page_00.expected (from test/suite/general_ci/plane00.expected) test/suite/general_ci/page_00.test (from test/suite/general_ci/plane00.test) Modified: normalizers/mysql.c (+10 -10) =================================================================== --- normalizers/mysql.c 2013-02-11 13:09:07 +0900 (3e38fe0) +++ normalizers/mysql.c 2013-02-11 13:13:43 +0900 (a31611b) @@ -75,30 +75,30 @@ unichar_to_utf8(uint32_t unichar, char *output) static inline void decompose_character(const char *rest, int character_length, - int *plane, uint32_t *low_code) + int *page, uint32_t *low_code) { switch (character_length) { case 1 : - *plane = 0x00; + *page = 0x00; *low_code = rest[0] & 0x7f; break; case 2 : - *plane = rest[0] & 0x1c; + *page = rest[0] & 0x1c; *low_code = ((rest[0] & 0x03) << 6) + (rest[1] & 0x3f); break; case 3 : - *plane = ((rest[0] & 0x0f) << 4) + ((rest[1] & 0x3c)); + *page = ((rest[0] & 0x0f) << 4) + ((rest[1] & 0x3c)); *low_code = ((rest[1] & 0x03) << 6) + (rest[2] & 0x3f); break; case 4 : - *plane = + *page = ((rest[0] & 0x07) << 10) + ((rest[1] & 0x3f) << 4) + ((rest[2]) & 0x3c); *low_code = ((rest[1] & 0x03) << 6) + (rest[2] & 0x3f); break; default : - *plane = -1; + *page = -1; *low_code = 0x00; break; } @@ -136,7 +136,7 @@ normalize(grn_ctx *ctx, grn_obj *string, uint32_t **normalize_table) rest_length = original_length_in_bytes; while (rest_length > 0) { int character_length; - int plane; + int page; uint32_t low_code; character_length = grn_plugin_charlen(ctx, rest, rest_length, encoding); @@ -144,16 +144,16 @@ normalize(grn_ctx *ctx, grn_obj *string, uint32_t **normalize_table) break; } - decompose_character(rest, character_length, &plane, &low_code); + decompose_character(rest, character_length, &page, &low_code); if (remove_blank_p && character_length == 1 && rest[0] == ' ') { if (current_type > types) { current_type[-1] |= GRN_CHAR_BLANK; } } else { - if ((0x00 <= plane && plane <= 0xff) && normalize_table[plane]) { + if ((0x00 <= page && page <= 0xff) && normalize_table[page]) { uint32_t normalized_code; unsigned int n_bytes; - normalized_code = normalize_table[plane][low_code]; + normalized_code = normalize_table[page][low_code]; n_bytes = unichar_to_utf8(normalized_code, normalized + normalized_length_in_bytes); normalized_length_in_bytes += n_bytes; Modified: normalizers/mysql_general_ci_table.h (+139 -139) =================================================================== --- normalizers/mysql_general_ci_table.h 2013-02-11 13:09:07 +0900 (e1e0c50) +++ normalizers/mysql_general_ci_table.h 2013-02-11 13:13:43 +0900 (15ed558) @@ -46,7 +46,7 @@ #include <stdint.h> -static uint32_t general_ci_plane_00[] = { +static uint32_t general_ci_page_00[] = { 0x00000, 0x00001, 0x00002, 0x00003, 0x00004, 0x00005, 0x00006, 0x00007, 0x00008, 0x00009, 0x0000a, 0x0000b, 0x0000c, 0x0000d, 0x0000e, 0x0000f, 0x00010, 0x00011, 0x00012, 0x00013, 0x00014, 0x00015, 0x00016, 0x00017, @@ -81,7 +81,7 @@ static uint32_t general_ci_plane_00[] = { 0x000d8, 0x00055, 0x00055, 0x00055, 0x00055, 0x00059, 0x000de, 0x00059 }; -static uint32_t general_ci_plane_01[] = { +static uint32_t general_ci_page_01[] = { 0x00041, 0x00041, 0x00041, 0x00041, 0x00041, 0x00041, 0x00043, 0x00043, 0x00043, 0x00043, 0x00043, 0x00043, 0x00043, 0x00043, 0x00044, 0x00044, 0x00110, 0x00110, 0x00045, 0x00045, 0x00045, 0x00045, 0x00045, 0x00045, @@ -116,7 +116,7 @@ static uint32_t general_ci_plane_01[] = { 0x0004e, 0x0004e, 0x00041, 0x00041, 0x000c6, 0x000c6, 0x000d8, 0x000d8 }; -static uint32_t general_ci_plane_02[] = { +static uint32_t general_ci_page_02[] = { 0x00041, 0x00041, 0x00041, 0x00041, 0x00045, 0x00045, 0x00045, 0x00045, 0x00049, 0x00049, 0x00049, 0x00049, 0x0004f, 0x0004f, 0x0004f, 0x0004f, 0x00052, 0x00052, 0x00052, 0x00052, 0x00055, 0x00055, 0x00055, 0x00055, @@ -151,7 +151,7 @@ static uint32_t general_ci_plane_02[] = { 0x002f8, 0x002f9, 0x002fa, 0x002fb, 0x002fc, 0x002fd, 0x002fe, 0x002ff }; -static uint32_t general_ci_plane_03[] = { +static uint32_t general_ci_page_03[] = { 0x00300, 0x00301, 0x00302, 0x00303, 0x00304, 0x00305, 0x00306, 0x00307, 0x00308, 0x00309, 0x0030a, 0x0030b, 0x0030c, 0x0030d, 0x0030e, 0x0030f, 0x00310, 0x00311, 0x00312, 0x00313, 0x00314, 0x00315, 0x00316, 0x00317, @@ -186,7 +186,7 @@ static uint32_t general_ci_plane_03[] = { 0x003f8, 0x003f9, 0x003fa, 0x003fb, 0x003fc, 0x003fd, 0x003fe, 0x003ff }; -static uint32_t general_ci_plane_04[] = { +static uint32_t general_ci_page_04[] = { 0x00415, 0x00415, 0x00402, 0x00413, 0x00404, 0x00405, 0x00406, 0x00406, 0x00408, 0x00409, 0x0040a, 0x0040b, 0x0041a, 0x00418, 0x00423, 0x0040f, 0x00410, 0x00411, 0x00412, 0x00413, 0x00414, 0x00415, 0x00416, 0x00417, @@ -221,7 +221,7 @@ static uint32_t general_ci_plane_04[] = { 0x0042b, 0x0042b, 0x004fa, 0x004fb, 0x004fc, 0x004fd, 0x004fe, 0x004ff }; -static uint32_t general_ci_plane_05[] = { +static uint32_t general_ci_page_05[] = { 0x00500, 0x00501, 0x00502, 0x00503, 0x00504, 0x00505, 0x00506, 0x00507, 0x00508, 0x00509, 0x0050a, 0x0050b, 0x0050c, 0x0050d, 0x0050e, 0x0050f, 0x00510, 0x00511, 0x00512, 0x00513, 0x00514, 0x00515, 0x00516, 0x00517, @@ -256,7 +256,7 @@ static uint32_t general_ci_plane_05[] = { 0x005f8, 0x005f9, 0x005fa, 0x005fb, 0x005fc, 0x005fd, 0x005fe, 0x005ff }; -static uint32_t general_ci_plane_1e[] = { +static uint32_t general_ci_page_1e[] = { 0x00041, 0x00041, 0x00042, 0x00042, 0x00042, 0x00042, 0x00042, 0x00042, 0x00043, 0x00043, 0x00044, 0x00044, 0x00044, 0x00044, 0x00044, 0x00044, 0x00044, 0x00044, 0x00044, 0x00044, 0x00045, 0x00045, 0x00045, 0x00045, @@ -291,7 +291,7 @@ static uint32_t general_ci_plane_1e[] = { 0x00059, 0x00059, 0x01efa, 0x01efb, 0x01efc, 0x01efd, 0x01efe, 0x01eff }; -static uint32_t general_ci_plane_1f[] = { +static uint32_t general_ci_page_1f[] = { 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00391, 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x00395, 0x01f16, 0x01f17, @@ -326,7 +326,7 @@ static uint32_t general_ci_plane_1f[] = { 0x0039f, 0x01ff9, 0x003a9, 0x01ffb, 0x003a9, 0x01ffd, 0x01ffe, 0x01fff }; -static uint32_t general_ci_plane_21[] = { +static uint32_t general_ci_page_21[] = { 0x02100, 0x02101, 0x02102, 0x02103, 0x02104, 0x02105, 0x02106, 0x02107, 0x02108, 0x02109, 0x0210a, 0x0210b, 0x0210c, 0x0210d, 0x0210e, 0x0210f, 0x02110, 0x02111, 0x02112, 0x02113, 0x02114, 0x02115, 0x02116, 0x02117, @@ -361,7 +361,7 @@ static uint32_t general_ci_plane_21[] = { 0x021f8, 0x021f9, 0x021fa, 0x021fb, 0x021fc, 0x021fd, 0x021fe, 0x021ff }; -static uint32_t general_ci_plane_24[] = { +static uint32_t general_ci_page_24[] = { 0x02400, 0x02401, 0x02402, 0x02403, 0x02404, 0x02405, 0x02406, 0x02407, 0x02408, 0x02409, 0x0240a, 0x0240b, 0x0240c, 0x0240d, 0x0240e, 0x0240f, 0x02410, 0x02411, 0x02412, 0x02413, 0x02414, 0x02415, 0x02416, 0x02417, @@ -396,7 +396,7 @@ static uint32_t general_ci_plane_24[] = { 0x024f8, 0x024f9, 0x024fa, 0x024fb, 0x024fc, 0x024fd, 0x024fe, 0x024ff }; -static uint32_t general_ci_plane_ff[] = { +static uint32_t general_ci_page_ff[] = { 0x0ff00, 0x0ff01, 0x0ff02, 0x0ff03, 0x0ff04, 0x0ff05, 0x0ff06, 0x0ff07, 0x0ff08, 0x0ff09, 0x0ff0a, 0x0ff0b, 0x0ff0c, 0x0ff0d, 0x0ff0e, 0x0ff0f, 0x0ff10, 0x0ff11, 0x0ff12, 0x0ff13, 0x0ff14, 0x0ff15, 0x0ff16, 0x0ff17, @@ -432,134 +432,134 @@ static uint32_t general_ci_plane_ff[] = { }; static uint32_t *general_ci_table[256] = { - general_ci_plane_00, general_ci_plane_01, - general_ci_plane_02, general_ci_plane_03, - general_ci_plane_04, general_ci_plane_05, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - NULL, NULL, - general_ci_plane_1e, general_ci_plane_1f, - NULL, general_ci_plane_21, - NULL, NULL, - general_ci_planegeneral_ci_plane_ff + general_ci_page_00, general_ci_page_01, + general_ci_page_02, general_ci_page_03, + general_ci_page_04, general_ci_page_05, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + NULL, NULL, + general_ci_page_1e, general_ci_page_1f, + NULL, general_ci_page_21, + NULL, NULL, + general_ci_pagegeneral_ci_page_ff }; #endif Renamed: test/suite/general_ci/page_00.expected (+0 -0) 100% =================================================================== Renamed: test/suite/general_ci/page_00.test (+0 -0) 100% =================================================================== Modified: tool/dump_difference_utf8.rb (+1 -1) =================================================================== --- tool/dump_difference_utf8.rb 2013-02-11 13:09:07 +0900 (5af6bb2) +++ tool/dump_difference_utf8.rb 2013-02-11 13:13:43 +0900 (8cbfe96) @@ -27,7 +27,7 @@ parser.parse(ARGF) n_differences = 0 n_expanded_sort_characters = 0 -parser.sorted_planes.each do |plane, characters| +parser.sorted_pages.each do |page, characters| characters.each do |character| base = character[:base] upper = character[:upper] Modified: tool/generate_utf8_table.rb (+24 -24) =================================================================== --- tool/generate_utf8_table.rb 2013-02-11 13:09:07 +0900 (5a605f1) +++ tool/generate_utf8_table.rb 2013-02-11 13:13:43 +0900 (dfedd94) @@ -31,17 +31,17 @@ File.open(ctype_utf8_c_path) do |ctype_utf8_c| parser.parse(ctype_utf8_c) end -target_planes = {} -parser.sorted_planes.each do |plane, characters| +target_pages = {} +parser.sorted_pages.each do |page, characters| characters.each do |character| base = character[:base] upper = character[:upper] lower = character[:lower] sort = character[:sort] next if base == sort - target_planes[plane] ||= [nil] * 256 + target_pages[page] ||= [nil] * 256 low_code = Unicode.from_utf8(base) & 0xff - target_planes[plane][low_code] = Unicode.from_utf8(sort) + target_pages[page][low_code] = Unicode.from_utf8(sort) end end @@ -97,48 +97,48 @@ puts(<<-HEADER) #include <stdint.h> HEADER -def plane_name(plane) - "general_ci_plane_%02x" % plane +def page_name(page) + "general_ci_page_%02x" % page end -target_planes.each do |plane, characters| - puts(<<-PLANE_HEADER) +target_pages.each do |page, characters| + puts(<<-PAGE_HEADER) -static uint32_t #{plane_name(plane)}[] = { -PLANE_HEADER +static uint32_t #{page_name(page)}[] = { +PAGE_HEADER lines = characters.each_with_index.each_slice(8).collect do |characters_group| formatted_code_points = characters_group.collect do |normalized, low_code| - normalized ||= (plane << 8) + low_code + normalized ||= (page << 8) + low_code "0x%05x" % normalized end " " + formatted_code_points.join(", ") end puts(lines.join(",\n")) - puts(<<-PLANE_FOOTER) + puts(<<-PAGE_FOOTER) }; -PLANE_FOOTER +PAGE_FOOTER end -puts(<<-PLANES_HEADER) +puts(<<-PAGES_HEADER) static uint32_t *general_ci_table[256] = { -PLANES_HEADER +PAGES_HEADER -planes = ["NULL"] * 256 -target_planes.each do |plane, characters| - planes[plane] = plane_name(plane) +pages = ["NULL"] * 256 +target_pages.each do |page, characters| + pages[page] = page_name(page) end -lines = planes.each_slice(2).collect do |planes_group| - formatted_planes = planes_group.collect do |plane| - "%19s" % plane +lines = pages.each_slice(2).collect do |pages_group| + formatted_pages = pages_group.collect do |page| + "%18s" % page end - " " + formatted_planes.join(", ") + " " + formatted_pages.join(", ") end puts(lines.join(",\n")) -puts(<<-PLANES_FOOTER) +puts(<<-PAGES_FOOTER) }; -PLANES_FOOTER +PAGES_FOOTER puts(<<-FOOTER) Modified: tool/parser.rb (+14 -14) =================================================================== --- tool/parser.rb 2013-02-11 13:09:07 +0900 (86d56e9) +++ tool/parser.rb 2013-02-11 13:13:43 +0900 (b865282) @@ -29,50 +29,50 @@ end class CTypeUTF8Parser def initialize - @planes = {} + @pages = {} end def parse(input) parse_ctype_utf8(input) - normalize_planes + normalize_pages end - def sorted_planes - @planes.sort_by do |plane, characters| - plane + def sorted_pages + @pages.sort_by do |page, characters| + page end end private def parse_ctype_utf8(input) - current_plane = nil + current_page = nil input.each_line do |line| case line when / plane([\da-fA-F]{2})\[\]=/ - current_plane = $1.to_i(16) - @planes[current_plane] = [] + current_page = $1.to_i(16) + @pages[current_page] = [] when /^\s* \{0x([\da-z]+),0x([\da-z]+),0x([\da-z]+)\}, \s* \{0x([\da-z]+),0x([\da-z]+),0x([\da-z]+)\},?$/ix - next if current_plane.nil? + next if current_page.nil? parsed_characters = $LAST_MATCH_INFO.captures.collect do |value| Unicode.to_utf8(value.to_i(16)) end upper1, lower1, sort1, upper2, lower2, sort2 = parsed_characters - characters = @planes[current_plane] + characters = @pages[current_page] characters << {:upper => upper1, :lower => lower1, :sort => sort1} characters << {:upper => upper2, :lower => lower2, :sort => sort2} when /^\};$/ - current_plane = nil + current_page = nil end end end - def normalize_planes - @planes.each do |plane, characters| + def normalize_pages + @pages.each do |page, characters| characters.each_with_index do |character, i| - character[:base] = Unicode.to_utf8((plane << 8) + i) + character[:base] = Unicode.to_utf8((page << 8) + i) end end end -------------- next part -------------- HTML����������������������������...Download