[Julius-cvs 880] CVS update: julius4/libsent/src/hmminfo

Back to archive index

sumom****@users***** sumom****@users*****
2013年 12月 18日 (水) 12:55:21 JST


Index: julius4/libsent/src/hmminfo/put_htkdata_info.c
diff -u julius4/libsent/src/hmminfo/put_htkdata_info.c:1.8 julius4/libsent/src/hmminfo/put_htkdata_info.c:1.9
--- julius4/libsent/src/hmminfo/put_htkdata_info.c:1.8	Fri Jun 21 02:14:21 2013
+++ julius4/libsent/src/hmminfo/put_htkdata_info.c	Wed Dec 18 12:55:21 2013
@@ -12,7 +12,7 @@
  * @author Akinobu LEE
  * @date   Tue Feb 15 23:36:00 2005
  *
- * $Revision: 1.8 $
+ * $Revision: 1.9 $
  * 
  */
 /*
@@ -148,6 +148,7 @@
     fprintf(fp, "no output state\n");
   } else {
     if (s->name != NULL) fprintf(fp, "[~s \"%s\"]\n", s->name);
+    fprintf(fp, "id: %d\n", s->id);
     for (st=0;st<s->nstream;st++) {
       fprintf(fp, "stream %d:", st + 1);
       if (s->w != NULL) {
Index: julius4/libsent/src/hmminfo/rdhmmdef.c
diff -u julius4/libsent/src/hmminfo/rdhmmdef.c:1.8 julius4/libsent/src/hmminfo/rdhmmdef.c:1.9
--- julius4/libsent/src/hmminfo/rdhmmdef.c:1.8	Fri Jun 21 02:14:21 2013
+++ julius4/libsent/src/hmminfo/rdhmmdef.c	Wed Dec 18 12:55:21 2013
@@ -30,7 +30,7 @@
  * @author Akinobu LEE
  * @date   Wed Feb 16 00:17:18 2005
  *
- * $Revision: 1.8 $
+ * $Revision: 1.9 $
  * 
  */
 /*
@@ -43,10 +43,14 @@
 #include <sent/stddefs.h>
 #include <sent/htk_param.h>
 #include <sent/htk_hmm.h>
+#ifdef HAVE_ZLIB
+#include <zlib.h>
+#endif
 
 #define MAXBUFLEN  4096		///< Maximum length of a line in the input
 
 char *rdhmmdef_token;		///< Current token string (GLOBAL)
+static boolean last_line_full = FALSE;
 static char buf[MAXBUFLEN];	///< Local work area for token reading
 static int line;		///< Input Line count
 
@@ -79,17 +83,52 @@
 char *
 read_token(FILE *fp)
 {
+  int len;
+  int bp = 0;
+  int maxlen = MAXBUFLEN;
+  static char delims[] = HMMDEF_DELM;
+
   if ((rdhmmdef_token = mystrtok_quote(NULL, HMMDEF_DELM)) != NULL) {
-    /* return next token */
-    return rdhmmdef_token;
+    /* has token */
+    if (mystrtok_movetonext(NULL, HMMDEF_DELM) != NULL || last_line_full == FALSE) {
+      /* return the current token, if this is not a last token, or
+	 last is newline terminated */
+      return rdhmmdef_token;
+    } else {
+      /* concatinate the last token with next line */
+      len = strlen(rdhmmdef_token);
+      memmove(buf, rdhmmdef_token, len);
+      bp = len;
+      maxlen -= len;
+    }
   }
-  /* read new 1 line */
-  if (getl(buf, MAXBUFLEN, fp) == NULL) {
-    rdhmmdef_token = NULL;
-  } else {
-    rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM);
-    line++;
+
+  /* read new 1 line a*/
+  while(
+#ifdef HAVE_ZLIB
+	gzgets((gzFile)fp, &(buf[bp]), maxlen) != Z_NULL
+#else
+	fgets(&(buf[bp]), maxlen, fp) != NULL
+#endif
+	) {
+    /* chop delimiters at end of line (incl. newline) */
+    /* if no delimiter at end of line, last_line_full is TRUE */
+    last_line_full = TRUE;
+    len = strlen(buf)-1;
+    while (len >= 0 && strchr(delims, buf[len])) {
+      last_line_full = FALSE;
+      buf[len--] = '\0';
+    }
+    if (buf[0] != '\0') {
+      /* start getting next token */
+      rdhmmdef_token = mystrtok_quote(buf, HMMDEF_DELM);
+      /* increment line */
+      line++;
+      return rdhmmdef_token;
+    }
   }
+  /* when reading error, return NULL */
+  rdhmmdef_token = NULL;
   return rdhmmdef_token;
 }
 
@@ -162,6 +201,52 @@
 }
 #endif
 
+boolean
+htk_hmm_check_sid(HTK_HMM_INFO *hmm)
+{
+  HTK_HMM_State *stmp;
+  boolean *check;
+  int i;
+  boolean ok_p;
+  
+  /* check if each state is assigned a valid sid */
+  check = (boolean *)mymalloc(sizeof(boolean) * hmm->totalstatenum);
+  for(i = 0; i < hmm->totalstatenum; i++) check[i] = FALSE;
+  for (stmp = hmm->ststart; stmp; stmp = stmp->next) {
+    if (stmp->id == -1) {
+      jlog("Error: rdhmmdef: no SID on some states\n");
+      free(check);
+      return FALSE;
+    }
+    if (stmp->id < 0) {
+      jlog("Error: rdhmmdef: invalid SID value: %d\n", stmp->id);
+      free(check);
+      return FALSE;
+    }
+    if (stmp->id >= hmm->totalstatenum) {
+      jlog("Error: rdhmmdef: SID value exceeds the number of states? (%d > %d)\n", stmp->id, hmm->totalstatenum);
+      free(check);
+      return FALSE;
+    }
+    if (check[stmp->id] == TRUE) {
+      jlog("Error: rdhmmdef: duplicate definition to the same SID: %d\n", stmp->id);
+      free(check);
+      return FALSE;
+    }
+    check[stmp->id] = TRUE;
+  }
+  ok_p = TRUE;
+  for(i = 0; i < hmm->totalstatenum; i++) {
+    if (check[i] == FALSE) {
+      jlog("Error: rdhmmdef: missing SID: %d\n", i);
+      ok_p = FALSE;
+    }
+  }
+  free(check);
+
+  return ok_p;
+}
+
 /** 
  * @brief  Main top routine to read in HTK %HMM definition file.
  *
@@ -288,25 +373,54 @@
     return FALSE;
   }
 
-  /* add ID number for all HTK_HMM_State */
-  /* also calculate the maximum number of mixture */
+  /* add ID number for all HTK_HMM_State if not assigned */
   {
     HTK_HMM_State *stmp;
-    int n, max, s, mix;
+    int n;
+    boolean has_sid;
+
+    /* caclculate total num and check if has sid */
+    has_sid = FALSE;
     n = 0;
-    max = 0;
     for (stmp = hmm->ststart; stmp; stmp = stmp->next) {
-      for(s=0;s<stmp->nstream;s++) {
-	mix = stmp->pdf[s]->mix_num;
-	if (max < mix) max = mix;
-      }
-      stmp->id = n++;
+      n++;
       if (n >= MAX_STATE_NUM) {
 	jlog("Error: rdhmmdef: too much states in a model > %d\n", MAX_STATE_NUM);
 	return FALSE;
       }
+      if (stmp->id != -1) {
+	has_sid = TRUE;
+      }
     }
     hmm->totalstatenum = n;
+    if (has_sid) {
+      jlog("Stat: rdhmmdef: <SID> found in the definition\n");
+      /* check if each state is assigned a valid sid */
+      if (htk_hmm_check_sid(hmm) == FALSE) {
+	jlog("Error: rdhmmdef: error in SID\n");
+	return FALSE;
+      }
+    } else {
+      /* assign internal sid (will not be saved) */
+      jlog("Stat: rdhmmdef: no <SID> embedded\n");
+      jlog("Stat: rdhmmdef: assign SID by the order of appearance\n");
+      n = hmm->totalstatenum;
+      for (stmp = hmm->ststart; stmp; stmp = stmp->next) {
+	stmp->id = --n;
+      }
+    }
+  }
+  /* calculate the maximum number of mixture */
+  {
+    HTK_HMM_State *stmp;
+    int max, s, mix;
+    max = 0;
+    for (stmp = hmm->ststart; stmp; stmp = stmp->next) {
+      for(s=0;s<stmp->nstream;s++) {
+	mix = stmp->pdf[s]->mix_num;
+	if (max < mix) max = mix;
+      }
+    }
     hmm->maxmixturenum = max;
   }
   /* compute total number of HMM models and maximum length */
Index: julius4/libsent/src/hmminfo/rdhmmdef_state.c
diff -u julius4/libsent/src/hmminfo/rdhmmdef_state.c:1.7 julius4/libsent/src/hmminfo/rdhmmdef_state.c:1.8
--- julius4/libsent/src/hmminfo/rdhmmdef_state.c:1.7	Fri Jun 21 02:14:21 2013
+++ julius4/libsent/src/hmminfo/rdhmmdef_state.c	Wed Dec 18 12:55:21 2013
@@ -12,7 +12,7 @@
  * @author Akinobu LEE
  * @date   Wed Feb 16 03:07:44 2005
  *
- * $Revision: 1.7 $
+ * $Revision: 1.8 $
  * 
  */
 /*
@@ -46,7 +46,7 @@
   for(i=0;i<new->nstream;i++) {
     new->pdf[i] = NULL;
   }
-  new->id = 0;
+  new->id = -1;
   new->next = NULL;
 
   return(new);
@@ -128,6 +128,13 @@
 
   new = state_new(hmm);
 
+  if (currentis("SID")) {
+    read_token(fp);
+    NoTokErr("missing SID value");
+    new->id = atoi(rdhmmdef_token);
+    read_token(fp);
+  }
+
   if (currentis("NUMMIXES")) {
     if (hmm->tmp_mixnum == NULL) {
       hmm->tmp_mixnum = (int *)mybmalloc2(sizeof(int) * hmm->opt.stream_info.num, &(hmm->mroot));
@@ -188,7 +195,7 @@
 {
   HTK_HMM_State *tmp;
 
-  if (currentis("NUMMIXES")||currentis("SWEIGHTS")||currentis("~w")||currentis("STREAM")||currentis("MIXTURE")||currentis("TMIX")||currentis("MEAN")||currentis("~m")||currentis("RCLASS")) {
+  if (currentis("SID")||currentis("NUMMIXES")||currentis("SWEIGHTS")||currentis("~w")||currentis("STREAM")||currentis("MIXTURE")||currentis("TMIX")||currentis("MEAN")||currentis("~m")||currentis("RCLASS")) {
     /* definition: define state data, and return the pointer */
     tmp = state_read(fp, hmm);
     tmp->name = NULL; /* no name */
Index: julius4/libsent/src/hmminfo/read_binhmm.c
diff -u julius4/libsent/src/hmminfo/read_binhmm.c:1.10 julius4/libsent/src/hmminfo/read_binhmm.c:1.11
--- julius4/libsent/src/hmminfo/read_binhmm.c:1.10	Fri Jun 21 02:14:21 2013
+++ julius4/libsent/src/hmminfo/read_binhmm.c	Wed Dec 18 12:55:21 2013
@@ -22,7 +22,7 @@
  * @author Akinobu LEE
  * @date   Wed Feb 16 05:23:59 2005
  *
- * $Revision: 1.10 $
+ * $Revision: 1.11 $
  * 
  */
 /*
@@ -862,13 +862,22 @@
     hmm->totalpdfnum = n;
   }
 
-  /* re-number state id */
+  /* check state id */
   {
     HTK_HMM_State *stmp;
-    int n = 0;
+    int n;
+    boolean has_sid;
+
+    /* check if each state is assigned a valid sid */
+    if (htk_hmm_check_sid(hmm) == FALSE) {
+      jlog("Error: rdhmmdef: error in SID\n");
+      return FALSE;
+    }
+#if 0
     for (stmp = hmm->ststart; stmp; stmp = stmp->next) {
       stmp->id = n++;
     }
+#endif
   }
   /* assign ID number for all HTK_HMM_Trans */
   {
Index: julius4/libsent/src/hmminfo/write_binhmm.c
diff -u julius4/libsent/src/hmminfo/write_binhmm.c:1.8 julius4/libsent/src/hmminfo/write_binhmm.c:1.9
--- julius4/libsent/src/hmminfo/write_binhmm.c:1.8	Fri Jun 21 02:14:21 2013
+++ julius4/libsent/src/hmminfo/write_binhmm.c	Wed Dec 18 12:55:21 2013
@@ -22,7 +22,7 @@
  * @author Akinobu LEE
  * @date   Wed Feb 16 06:03:36 2005
  *
- * $Revision: 1.8 $
+ * $Revision: 1.9 $
  * 
  */
 /*
@@ -31,7 +31,7 @@
  * All rights reserved
  */
 
-/* $Id: write_binhmm.c,v 1.8 2013/06/20 17:14:21 sumomo Exp $ */
+/* $Id: write_binhmm.c,v 1.9 2013/12/18 03:55:21 sumomo Exp $ */
 
 #include <sent/stddefs.h>
 #include <sent/htk_param.h>
@@ -823,8 +823,9 @@
 static int
 qsort_st_index(HTK_HMM_State **s1, HTK_HMM_State **s2)
 {
-  if (*s1 > *s2) return 1;
-  else if (*s1 < *s2) return -1;
+  /* keep ID order */
+  if ((*s1)->id > (*s2)->id) return 1;
+  else if ((*s1)->id < (*s2)->id) return -1;
   else return 0;
 }
 
@@ -918,7 +919,8 @@
 
   while (left < right) {
     mid = (left + right) / 2;
-    if (st_index[mid] < s) {
+    /* search by id */
+    if (st_index[mid]->id < s->id) {
       left = mid + 1;
     } else {
       right = mid;



Julius-cvs メーリングリストの案内
Back to archive index