t-suw****@users*****
t-suw****@users*****
2007年 9月 12日 (水) 22:45:16 JST
Index: AquaSKK/src/utility/utf8util.h
diff -u /dev/null AquaSKK/src/utility/utf8util.h:1.1.2.1
--- /dev/null Wed Sep 12 22:45:16 2007
+++ AquaSKK/src/utility/utf8util.h Wed Sep 12 22:45:16 2007
@@ -0,0 +1,181 @@
+/* -*- C++ -*-
+ *
+ * utf8util.h - UTF-8 utilities
+ *
+ * Copyright (c) 2007 Tomotaka SUWA, All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * 3. Neither the name of the authors nor the names of its contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+ * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef INC__utf8util__
+#define INC__utf8util__
+
+#include <string>
+
+// UTF8 ã¤ãã¬ã¼ã¿
+template <typename Iterator>
+class utf8iterator {
+ Iterator curr_;
+
+ unsigned size() const {
+ static unsigned table[] = {
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, // 0x00
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x80
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, // 0xc0
+ 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+ 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, // 0xe0
+ 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 0, 0 // 0xf0
+ };
+
+ return table[(unsigned char)*curr_];
+ }
+
+ bool leadbyte() const { return size() != 0; }
+
+ utf8iterator& next(int count = 1) {
+ for(int i = 0; i < count; ++ i) {
+ for(++ curr_; !leadbyte(); ++ curr_) {}
+ }
+ return *this;
+ }
+
+ utf8iterator& prev(int count = 1) {
+ for(int i = 0; i < count; ++ i) {
+ for(-- curr_; !leadbyte(); -- curr_) {}
+ }
+ return *this;
+ }
+
+public:
+ utf8iterator() : curr_(0) {}
+ utf8iterator(Iterator iter) : curr_(iter) {}
+ utf8iterator& operator=(Iterator iter) {
+ curr_ = iter;
+ return *this;
+ }
+
+ Iterator iterator() { return curr_; }
+ unsigned charsize() const { return size(); }
+
+ std::string operator*() const {
+ return std::string(curr_, curr_ + size());
+ }
+
+ utf8iterator& operator++() { return next(); }
+ utf8iterator operator++(int) {
+ utf8iterator iter(*this);
+ ++ curr_;
+ return iter;
+ }
+
+ utf8iterator& operator--() { return prev(); }
+ utf8iterator operator--(int) {
+ utf8iterator iter(*this);
+ -- curr_;
+ return iter;
+ }
+
+ utf8iterator& operator+=(int count) { return 0 < count ? next(count) : prev(abs(count)); }
+ utf8iterator& operator-=(int count) { return 0 < count ? prev(count) : next(abs(count)); }
+
+ friend bool operator==(const utf8iterator& lhs, const utf8iterator& rhs) { return lhs.curr_ == rhs.curr_; }
+ friend bool operator!=(const utf8iterator& lhs, const utf8iterator& rhs) { return lhs.curr_ != rhs.curr_; }
+ friend bool operator<(const utf8iterator& lhs, const utf8iterator& rhs) { return lhs.curr_ < rhs.curr_; }
+};
+
+template <typename Iterator>
+int operator-(utf8iterator<Iterator> end, utf8iterator<Iterator> beg) {
+ int size = 0;
+ for(utf8iterator<Iterator> cur = beg; beg != end; ++ beg) { ++ size; }
+ return size;
+}
+
+template <typename Iterator>
+utf8iterator<Iterator> operator+(utf8iterator<Iterator> beg, int count) { return beg += count; }
+template <typename Iterator>
+utf8iterator<Iterator> operator-(utf8iterator<Iterator> beg, int count) { return beg -= count; }
+
+// åå§çãªæç¶ã
+namespace utf8 {
+ typedef utf8iterator<std::string::iterator> iterator;
+ typedef utf8iterator<std::string::const_iterator> const_iterator;
+
+ // UTF8 æååé·åå¾
+ unsigned length(const std::string& str) {
+ return const_iterator(str.end()) - const_iterator(str.begin());
+ }
+
+ // æå®ä½ç½®ã«æååæ¿å
¥
+ //
+ // ä¾ï¼
+ // std::string str = "æåå";
+ // utf8::push(str, "a", -3); // "aæåå";
+ // utf8::push(str, "b"); // "aæååb";
+ //
+ void push(std::string& target, const std::string& str, int offset = 0) {
+ if(0 <= offset || target.empty()) {
+ target += str;
+ } else {
+ iterator end(target.end());
+ iterator pos(end + offset);
+
+ if(pos.iterator() < target.begin()) {
+ pos = target.begin();
+ }
+ target.insert(pos.iterator() - target.begin(), str);
+ }
+ }
+
+ // æå®ä½ç½®ãã䏿åå¾éãã¦åé¤
+ //
+ // ä¾ï¼
+ // std::string str = "æåå";
+ // utf8::pop(str); // "æå";
+ // utf8::pop(str, -1); // "å";
+ //
+ void pop(std::string& target, int offset = 0) {
+ iterator end(target.end());
+ iterator pos(end + (0 < offset ? 0 : offset) - 1);
+
+ if(!(pos.iterator() < target.begin())) {
+ target.erase(pos.iterator() - target.begin(), pos.charsize());
+ }
+ }
+};
+
+#endif