
var editCPod;

var Wieger = 858;

var exist  = new Array;

var phoneticList = new Array;
var phoneticAll  = new Array;

var rpinyin = new Array;
var xpinyin = new Array;

var tTotal = 0;
var cTotal = 0;

var openpar = '(';
var closepar = ')';

var initialGroups = [['b','p','m','f'],['d','t','n','l'],['g','k','h'],['j','q','x'],['zh','ch','sh'],['r','z','c','s'],['w','y']];
var finalGroups = [['ai','ei'],['e','ei','ie'],['o','ou','ao'],['en','un']];

// substrings of each other :
// ['a','ai'],['an','ang'],['an','ian'],['ang','iang'],['en','eng'],['in','ing'],['uan','uang'],['ü','üe'],['ü','ün']
//

var voweList = 'aeiouüv_'; // y treated as initial, not vowel, and '_' is substituted for 'i' in zi,zhi,shi etc.

var correctedPinyin = new Array;

correctedPinyin['chi'] = 'ch_';
correctedPinyin['shi'] = 'sh_';
correctedPinyin['zhi'] = 'zh_';
correctedPinyin['ci']  = 'c_';
correctedPinyin['si']  = 's_';
correctedPinyin['zi']  = 'z_';
correctedPinyin['ri']  = 'r_';
correctedPinyin['ju']  = 'jü';
correctedPinyin['juan']= 'jüan';
correctedPinyin['jue'] = 'jüe';
correctedPinyin['jun'] = 'jün';
correctedPinyin['qu']  = 'qü';
correctedPinyin['quan']= 'qüan';
correctedPinyin['que'] = 'qüe';
correctedPinyin['qun'] = 'qün';
correctedPinyin['xu']  = 'xü';
correctedPinyin['xuan']= 'xüan';
correctedPinyin['xue'] = 'xüe';
correctedPinyin['xun'] = 'xün';
correctedPinyin['ye']  = 'ie';
correctedPinyin['yu']  = 'yü';
correctedPinyin['yuan']= 'yüan';
correctedPinyin['yue'] = 'yüe';
correctedPinyin['yun'] = 'yün';

function isvowel(q) { return (1+voweList.indexOf(q)); }

function correctedFinal(p) {
var c;
 c = correctedPinyin[p];
 if (c == undefined) c = p;
 return (striptovowel(c));
}

function findvowel(s) {
var k;

 for (k=0;k<s.length;k++) if (isvowel(s.charAt(k))) break;
 return k;
}

function striptovowel(s) { return s.substr(findvowel(s)); }

function matchingPair(t,p,q) {
var g,h,i,j,k,l;

 for(i=0;i<t.length;i++) {
   g = t[i];
   for(j=0;j<g.length;j++) {
     h = g[j];
     for(k=0;k<j;k++) {
      l = g[k];
      if ((p==h) && (q==l)) return true;
      if ((q==h) && (p==l)) return true;
     }
   }
 }
 return false;
}

function initialDistance(p,q) {

 if (p.length==0) return 2;
 if (q.length==0) return 2;
 if (stringCompare(p,q)==0) return 0;
 if (matchingPair(initialGroups,p,q)) return 1;
 return 2;
}


function finalDistance(p,q) {

 if (p.length==0) return 2;
 if (q.length==0) return 2;
 if (stringCompare(p,q)==0) return 0;
 if ((p.indexOf(q)>=0) || (q.indexOf(p)>=0)) return 0.5; // one is a substring of the other, e.g. a/ai an/ang or ang/iang
 if (matchingPair(finalGroups,p,q)) return 1;
 return 2;
}

function pinyinDistance(p,q) {
var a,b,c,d,e,f,k,v;

 a = findvowel(p);
 b = findvowel(q);

 c = p.substr(0,a); // initial(p)
 d = q.substr(0,b); // initial(q)
 e = p.substr(a);   // vowel(p)
 f = q.substr(b);   // vowel(q)

 if (c.length==0) { if (d.length==0) k=0; else k=1;}
 else if (d.length==0) k=1
 else k = initialDistance(c,d);

 if (e.length==0) { if (f.length==0) v=0; else v=1;}
 else if (f.length==0) v=1
 else v = finalDistance(e,f);

 return k+v;
}

function pronunciationDistance(xl,p,x) {   // compute scaterring coefficient from array of pinyin pronunciations
                                           // also adds list of alternate pronunciations to the cross reference array
var d,k,l,y,z,yy,zz;
 //
 // global variable xpinyin
 //
 d = 0;
 l=xl.length;
 if (l>1) {    // more than one pronunciation
   for (k=0; k<l; k++) {
     z = xl[k];
     zz = correctedPinyin[z];
     if (zz == undefined) zz=z;
     for (g=0; g<k; g++) {
       y = xl[g];
       yy = correctedPinyin[y];
       if (yy == undefined) yy=y;
       d += pinyinDistance(yy,zz);
     }
     // add alternate pronunciations
     if (xpinyin[z] == undefined) xpinyin[z] = new Array;
     xt = xpinyin[z];
     xt[xt.length]=x;
   }
 }
 else { // l==1 // only one pronunciation
   if (xpinyin[p] == undefined) xpinyin[p] = new Array;
   xt = xpinyin[p];
   xt[xt.length]=x;
 }
 return d/l;
}

 function allNode(j,p,f,r,x,cp) {
  this.ndex      = j;
  this.ndexpinyin= j;
  this.ndexfinal = j;
  this.pinyin    = p;
  this.final     = f
  this.corrected = r;
  this.char      = x;
  this.CPod      = cp;
  this.list      = 0; // reference to listNode entry
 }

  // this.prototype.xxx = function() {}
  // code for move method goes here - include in allNode/listNode definition

 function listNode(j,p,f,r,x,n,cp,a,b,c,ap,nc,cl) {
  this.ndex       = j;
  this.ndexpinyin = j;
  this.ndexfinal  = j;
  this.pinyin     = p;
  this.corrected  = r;
  this.char       = x;
  this.strokes    = n;
  this.CPod       = cp;
  this.final      = f
  this.npinyin    = a;
  this.nfirst     = b;
  this.ntotal     = c;
  this.pdistance  = 0;
  this.nscatter   = 1;
  this.alternate  = ap;
  this.ncompound  = nc;
  this.charlist   = cl;
  this.nchars     = length2(cl); // unlike ntotal, it does include the core phonetic, but only lists duplicate characters once
                                 // cl.length wouldn't handle cases where no single unicode exist, notation (X+Y)
 }
/*
 function dedup (s) {
 var i,r;
  
  r = '';
  for (i=0;i<s.length;i++) {
    c = s.charAt(i);
    if (r.indexOf(c)<0) r+= c;
  }
  return r;
 }
*/

 function dedup2 (s) {  // skips cases where no single unicode exist, notation (X+Y)
 var i,r;
 var skip;
  
  r = '';
  skip = 0;
  for (i=0;i<s.length;i++) {
    c = s.charAt(i);
    if (!skip) {
      if (c == openpar) {r+= c; skip++;} else if (r.indexOf(c)<0) r+= c;
    }
    else { 
     r+= c; 
     if (c == closepar) skip--;
    }
  }
  return r;
 }

 function length2 (s) {  // length of list of characters, handling cases where no single unicode exist, notation (X+Y)
 var i,l;
 var skip;
  
  skip = 0;
  for (i=l=0;i<s.length;i++) {
    c = s.charAt(i);
    if (!skip) {
      l++;
      if (c == openpar) skip++;
    }
    else if (c == closepar) skip--;
  }
  return l;
 }

 function d0(p,x,n,cp,a,b,c,ap,nc,cl) {
  // p : pinyin sound
  // x : phonetic character
  // n : number of strokes
  // cp : CPod post index
  // a : number of pinyin sounds 
  // b : number of characters with 1st sound **not including phonetic element itself**
  // c : total number of characters
  // ap : alternate pinyin pronunciations ('+' separated list, excluding core phonetic)
  // nc : number of compounds for each pronunciation ('+' separated list)
  //	b == 1st nc element
  //	c == sum of nc elements
  // cl : character list (excluding core phonetic)

  var stripped,corrected,keychar;

  j++;
  if (p.length) {
    stripped = striptovowel(p);
    corrected = correctedFinal(p);
    var node1 = new allNode(j,p,stripped,corrected,x,c);
    phoneticAll[j] = node1;
  }
  else {
   var node1 = new allNode(j,p,p,p,x,0);
   phoneticAll[j] = node1;
   return;  // only in the case of separators
  }
  if (cp) {  // only load characters with both pinyin and CPod entry
    i++;
    rpinyin[x] = i;
    node1.list = i;
    if (x.indexOf('+') < 0) keychar = x.charAt(0); else keychar = openpar + x + closepar; // if character contains + sign it's a unicode compound, so put in parentheses
    var node2 = new listNode(j,p,stripped,corrected,x,n,cp,a,b,c,ap,nc,dedup2(keychar+cl)); // add char to list of compounds, but remove extra information (i.e. traditional form)
    phoneticList[i]=node2;
    tTotal += c;
    cTotal += node2.nchars;
    if (nc != undefined) {
      xn = nc.split('+');
      l=xn.length;       e=f=0;
      for(k=0; k<l; k++) { 
        n=parseInt(xn[k],10);
        if (k==0) n++;  // phonetic element always comes first but is not counted in the list of compounds
        e+=(n*n); f+=n;
      }
      f = (f*f);
      node2.nscatter = Math.sqrt(f/e);
    }
    if (ap != undefined) {   // ap : alternate pinyin pronunciations
      if (ap.length) xl = ap.split('+'); else xl = new Array;
      xl [xl.length] = p;   // add root pronunciation to list
      node2.pdistance = pronunciationDistance(xl,p,x);
    }
    node2.scattercoeff = node2.pdistance * node2.nscatter;
  }
}

var dbList = new Array;
var dbCore = new Object();

var lastCore = 0;
var lastNdex = 0;
var lastRoot;
var lastPdex;

function dbNode(ndex,hanzi,pinyin,definition) {
  this.ndex = ndex;
  this.char = hanzi;
  this.loaded = false;
  this.core = '';
  this.pinyin = pinyin;
  this.definition = definition;
  this.db  = new Object();
  this.compounds = new Array;
  this.phonetics = new Array;
  this.alternate = new Array;
  this.charlist;               // core only
  this.phonetics[0] = this;    // core
  this.comment = '';
 }

function dbSub(hanzi,pinyin,definition) {
  this.hanzi = hanzi;
  this.pinyin = pinyin;
  this.definition = definition;
 }

function d1(ndex,hanzi,pinyin) {
var c,assoc,node,alt,root,temp;

 // ndex : phonetic index 1..870+
 // this is a core phonetic character, it becomes current core
 //
 node = new dbNode(ndex,hanzi,pinyin,undefined);
 lastCore = node;
 lastNdex = ndex;
 lastPdex = 0;
 lastRoot = 0;
 dbCore[ndex]=node;
 dbList[dbList.length]=node;
}

function d2(hanzi,pinyin) {
var c,assoc,node,alt,root,temp;

 node = new dbNode(lastNdex,hanzi,pinyin,undefined);
 if (!lastCore) alert ('error, no current core phonetic for '+hanzi);
 else {
   lastRoot = node;
   node.core = lastCore;
   phon = lastCore.phonetics;
   phon[phon.length] = node;
   assoc = lastCore.db;
   temp = assoc[hanzi];
   if (temp == undefined) lastCore.db[hanzi]=node; // assoc[hanzi]
   else {  
    // this is an alternate pronunciation
    alt = temp.alternate;
    alt[alt.length] = node; 
   }
 }
}

function db(ndex,hanzi,pinyin,definition) {
var c,assoc,node,alt,root,temp;

 // ndex : phonetic index 1..870+
 // this is a core phonetic character, it becomes current core
 //
 node = dbCore[ndex];
 node.definition = definition;
 lastCore = node;
 lastNdex = ndex;
 lastPdex = 1; // because node.phonetics[0] is core itself
 lastRoot = 0;
}

function dc(hanzi,pinyin,definition) {
var c,assoc,node,alt,root,temp;

 if (!lastCore) alert ('error, no current core phonetic for '+hanzi);
 else {
   node = lastCore.phonetics[lastPdex++];
   node.definition = definition;
   lastRoot = node;
 }
}

function dd(hanzi,pinyin,definition) {
var node,comp; 

  if (lastRoot) comp = lastRoot.compounds; else comp = lastCore.compounds;
  node = new dbSub(hanzi,pinyin,definition);
  comp[comp.length] = node;
}

//  if (lastRoot) alert('lastCore='+lastCore+' lastRoot='+lastRoot+' root: '+lastRoot.char+' : '+hanzi+','+definition)
//           else alert('lastCore='+lastCore+' lastRoot='+lastRoot+' core: '+lastCore.char+' : '+hanzi+','+definition);

function de(comment) {
 if (lastRoot) lastRoot.comment = comment; else lastCore.comment = comment;
}

function jsinclude(jsfile) {
 //
 // source http://www.wait-till-i.com/2005/02/16/conditional-loading-of-large-javascript-files/
 //    and http://elmicox.blogspot.com/2006/12/include-em-javascript.html
 //
 var newjs = document.createElement('script');

 newjs.setAttribute('type','text/javascript');
 newjs.setAttribute('src',jsfile);
 //
 // newjs.type = 'text/javascript';
 // newjs.src  = jsfile;
 //
 document.getElementsByTagName('head')[0].appendChild(newjs);
}

