汉字和拼音的互相转换

风行水上 @ 2012-07-05 14:46:01
标签:

    汉字转拼音

    利用GB2312编码一级汉字的排序特性

    GB2312简体中文编码中16-55区为一级汉字,按拼音排序,共3755个。据此可以利用表查询的方法得到汉字的拼音。

    具体做法是用汉字的GB2312编码作为索引,利用二分法查找。

    下面是GB2312编码中一级汉字的拼音查找表:

    var LUT = [
         'a', 0xb0a1,    'ai', 0xb0a3,    'an', 0xb0b0,   'ang', 0xb0b9,    'ao', 0xb0bc,
        'ba', 0xb0c5,   'bai', 0xb0d7,   'ban', 0xb0df,  'bang', 0xb0ee,   'bao', 0xb0fa,
       'bei', 0xb1ad,   'ben', 0xb1bc,  'beng', 0xb1c0,    'bi', 0xb1c6,  'bian', 0xb1de,
      'biao', 0xb1ea,   'bie', 0xb1ee,   'bin', 0xb1f2,  'bing', 0xb1f8,    'bo', 0xb2a3,
        'bu', 0xb2b8,    'ca', 0xb2c1,   'cai', 0xb2c2,   'can', 0xb2cd,  'cang', 0xb2d4,
       'cao', 0xb2d9,    'ce', 0xb2de,  'ceng', 0xb2e3,   'cha', 0xb2e5,  'chai', 0xb2f0,
      'chan', 0xb2f3, 'chang', 0xb2fd,  'chao', 0xb3ac,   'che', 0xb3b5,  'chen', 0xb3bb,
     'cheng', 0xb3c5,   'chi', 0xb3d4, 'chong', 0xb3e4,  'chou', 0xb3e9,   'chu', 0xb3f5,
     'chuai', 0xb4a7, 'chuan', 0xb4a8,'chuang', 0xb4af,  'chui', 0xb4b5,  'chun', 0xb4ba,
      'chuo', 0xb4c1,    'ci', 0xb4c3,  'cong', 0xb4cf,   'cou', 0xb4d5,    'cu', 0xb4d6,
      'cuan', 0xb4da,   'cui', 0xb4dd,   'cun', 0xb4e5,   'cuo', 0xb4e8,    'da', 0xb4ee,
       'dai', 0xb4f4,   'dan', 0xb5a2,  'dang', 0xb5b1,   'dao', 0xb5b6,    'de', 0xb5c2,
      'deng', 0xb5c5,    'di', 0xb5cc,  'dian', 0xb5df,  'diao', 0xb5ef,   'die', 0xb5f8,
      'ding', 0xb6a1,   'diu', 0xb6aa,  'dong', 0xb6ab,   'dou', 0xb6b5,    'du', 0xb6bc,
      'duan', 0xb6cb,   'dui', 0xb6d1,   'dun', 0xb6d5,   'duo', 0xb6de,     'e', 0xb6ea,
        'en', 0xb6f7,    'er', 0xb6f8,    'fa', 0xb7a2,   'fan', 0xb7aa,  'fang', 0xb7bb,
       'fei', 0xb7c6,   'fen', 0xb7d2,  'feng', 0xb7e1,    'fo', 0xb7f0,   'fou', 0xb7f1,
        'fu', 0xb7f2,    'ga', 0xb8c1,   'gai', 0xb8c3,   'gan', 0xb8c9,  'gang', 0xb8d4,
       'gao', 0xb8dd,    'ge', 0xb8e7,   'gei', 0xb8f8,   'gen', 0xb8f9,  'geng', 0xb8fb,
      'gong', 0xb9a4,   'gou', 0xb9b3,    'gu', 0xb9bc,   'gua', 0xb9ce,  'guai', 0xb9d4,
      'guan', 0xb9d7, 'guang', 0xb9e2,   'gui', 0xb9e5,   'gun', 0xb9f5,   'guo', 0xb9f8,
        'ha', 0xb9fe,   'hai', 0xbaa1,   'han', 0xbaa8,  'hang', 0xbabb,   'hao', 0xbabe,
        'he', 0xbac7,   'hei', 0xbad9,   'hen', 0xbadb,  'heng', 0xbadf,  'hong', 0xbae4,
       'hou', 0xbaed,    'hu', 0xbaf4,   'hua', 0xbba8,  'huai', 0xbbb1,  'huan', 0xbbb6,
     'huang', 0xbbc4,   'hui', 0xbbd2,   'hun', 0xbbe7,   'huo', 0xbbed,    'ji', 0xbbf7,
       'jia', 0xbcce,  'jian', 0xbcdf, 'jiang', 0xbda9,  'jiao', 0xbdb6,   'jie', 0xbdd2,
       'jin', 0xbded,  'jing', 0xbea3, 'jiong', 0xbebc,   'jiu', 0xbebe,    'ju', 0xbecf,
      'juan', 0xbee8,   'jue', 0xbeef,   'jun', 0xbef9,    'ka', 0xbfa6,   'kai', 0xbfaa,
       'kan', 0xbfaf,  'kang', 0xbfb5,   'kao', 0xbfbc,    'ke', 0xbfc0,   'ken', 0xbfcf,
      'keng', 0xbfd3,  'kong', 0xbfd5,   'kou', 0xbfd9,    'ku', 0xbfdd,   'kua', 0xbfe4,
      'kuai', 0xbfe9,  'kuan', 0xbfed, 'kuang', 0xbfef,   'kui', 0xbff7,   'kun', 0xc0a4,
       'kuo', 0xc0a8,    'la', 0xc0ac,   'lai', 0xc0b3,   'lan', 0xc0b6,  'lang', 0xc0c5,
       'lao', 0xc0cc,    'le', 0xc0d5,   'lei', 0xc0d7,  'leng', 0xc0e2,    'li', 0xc0e5,
       'lia', 0xc1a9,  'lian', 0xc1aa, 'liang', 0xc1b8,  'liao', 0xc1c3,   'lie', 0xc1d0,
       'lin', 0xc1d5,  'ling', 0xc1e1,   'liu', 0xc1ef,  'long', 0xc1fa,   'lou', 0xc2a5,
        'lu', 0xc2ab,    'lv', 0xc2bf,  'luan', 0xc2cd,   'lue', 0xc2d3,   'lun', 0xc2d5,
       'luo', 0xc2dc,    'ma', 0xc2e8,   'mai', 0xc2f1,   'man', 0xc2f7,  'mang', 0xc3a2,
       'mao', 0xc3a8,    'me', 0xc3b4,   'mei', 0xc3b5,   'men', 0xc3c5,  'meng', 0xc3c8,
        'mi', 0xc3d0,  'mian', 0xc3de,  'miao', 0xc3e7,   'mie', 0xc3ef,   'min', 0xc3f1,
      'ming', 0xc3f7,   'miu', 0xc3fd,    'mo', 0xc3fe,   'mou', 0xc4b1,    'mu', 0xc4b4,
        'na', 0xc4c3,   'nai', 0xc4ca,   'nan', 0xc4cf,  'nang', 0xc4d2,   'nao', 0xc4d3,
        'ne', 0xc4d8,   'nei', 0xc4d9,   'nen', 0xc4db,  'neng', 0xc4dc,    'ni', 0xc4dd,
      'nian', 0xc4e8, 'niang', 0xc4ef,  'niao', 0xc4f1,   'nie', 0xc4f3,   'nin', 0xc4fa,
      'ning', 0xc4fb,   'niu', 0xc5a3,  'nong', 0xc5a7,    'nu', 0xc5ab,    'nv', 0xc5ae,
      'nuan', 0xc5af,   'nue', 0xc5b0,   'nuo', 0xc5b2,     'o', 0xc5b6,    'ou', 0xc5b7,
        'pa', 0xc5be,   'pai', 0xc5c4,   'pan', 0xc5ca,  'pang', 0xc5d2,   'pao', 0xc5d7,
       'pei', 0xc5de,   'pen', 0xc5e7,  'peng', 0xc5e9,    'pi', 0xc5f7,  'pian', 0xc6aa,
      'piao', 0xc6ae,   'pie', 0xc6b2,   'pin', 0xc6b4,  'ping', 0xc6b9,    'po', 0xc6c2,
        'pu', 0xc6cb,    'qi', 0xc6da,   'qia', 0xc6fe,  'qian', 0xc7a3, 'qiang', 0xc7b9,
      'qiao', 0xc7c1,   'qie', 0xc7d0,   'qin', 0xc7d5,  'qing', 0xc7e0, 'qiong', 0xc7ed,
       'qiu', 0xc7ef,    'qu', 0xc7f7,  'quan', 0xc8a6,   'que', 0xc8b1,   'qun', 0xc8b9,
       'ran', 0xc8bb,  'rang', 0xc8bf,   'rao', 0xc8c4,    're', 0xc8c7,   'ren', 0xc8c9,
      'reng', 0xc8d3,    'ri', 0xc8d5,  'rong', 0xc8d6,   'rou', 0xc8e0,    'ru', 0xc8e3,
      'ruan', 0xc8ed,   'rui', 0xc8ef,   'run', 0xc8f2,   'ruo', 0xc8f4,    'sa', 0xc8f6,
       'sai', 0xc8f9,   'san', 0xc8fd,  'sang', 0xc9a3,   'sao', 0xc9a6,    'se', 0xc9aa,
       'sen', 0xc9ad,  'seng', 0xc9ae,   'sha', 0xc9af,  'shai', 0xc9b8,  'shan', 0xc9ba,
     'shang', 0xc9ca,  'shao', 0xc9d2,   'she', 0xc9dd,  'shen', 0xc9e9, 'sheng', 0xc9f9,
       'shi', 0xcaa6,  'shou', 0xcad5,   'shu', 0xcadf,  'shua', 0xcba2, 'shuai', 0xcba4,
     'shuan', 0xcba8,'shuang', 0xcbaa,  'shui', 0xcbad,  'shun', 0xcbb1,  'shuo', 0xcbb5,
        'si', 0xcbb9,  'song', 0xcbc9,   'sou', 0xcbd1,    'su', 0xcbd4,  'suan', 0xcbe1,
       'sui', 0xcbe4,   'sun', 0xcbef,   'suo', 0xcbf2,    'ta', 0xcbfa,   'tai', 0xcca5,
       'tan', 0xccae,  'tang', 0xccc0,   'tao', 0xcccd,    'te', 0xccd8,  'teng', 0xccd9,
        'ti', 0xccdd,  'tian', 0xccec,  'tiao', 0xccf4,   'tie', 0xccf9,  'ting', 0xccfc,
      'tong', 0xcda8,   'tou', 0xcdb5,    'tu', 0xcdb9,  'tuan', 0xcdc4,   'tui', 0xcdc6,
       'tun', 0xcdcc,   'tuo', 0xcdcf,    'wa', 0xcdda,   'wai', 0xcde1,   'wan', 0xcde3,
      'wang', 0xcdf4,   'wei', 0xcdfe,   'wen', 0xcec1,  'weng', 0xcecb,    'wo', 0xcece,
        'wu', 0xced7,    'xi', 0xcef4,   'xia', 0xcfb9,  'xian', 0xcfc6, 'xiang', 0xcfe0,
      'xiao', 0xcff4,   'xie', 0xd0a8,   'xin', 0xd0bd,  'xing', 0xd0c7, 'xiong', 0xd0d6,
       'xiu', 0xd0dd,    'xu', 0xd0e6,  'xuan', 0xd0f9,   'xue', 0xd1a5,   'xun', 0xd1ab,
        'ya', 0xd1b9,   'yan', 0xd1c9,  'yang', 0xd1ea,   'yao', 0xd1fb,    'ye', 0xd2ac,
        'yi', 0xd2bb,   'yin', 0xd2f0,  'ying', 0xd3a2,    'yo', 0xd3b4,  'yong', 0xd3b5,
       'you', 0xd3c4,    'yu', 0xd3d9,  'yuan', 0xd4a7,   'yue', 0xd4bb,   'yun', 0xd4c5,
        'za', 0xd4d1,   'zai', 0xd4d4,   'zan', 0xd4db,  'zang', 0xd4df,   'zao', 0xd4e2,
        'ze', 0xd4f0,   'zei', 0xd4f4,   'zen', 0xd4f5,  'zeng', 0xd4f6,   'zha', 0xd4fa,
      'zhai', 0xd5aa,  'zhan', 0xd5b0, 'zhang', 0xd5c1,  'zhao', 0xd5d0,   'zhe', 0xd5da,
      'zhen', 0xd5e4, 'zheng', 0xd5f4,   'zhi', 0xd6a5, 'zhong', 0xd6d0,  'zhou', 0xd6db,
       'zhu', 0xd6e9,  'zhua', 0xd7a5, 'zhuai', 0xd7a7, 'zhuan', 0xd7a8,'zhuang', 0xd7ae,
      'zhui', 0xd7b5,  'zhun', 0xd7bb,  'zhuo', 0xd7bd,    'zi', 0xd7c8,  'zong', 0xd7d7,
       'zou', 0xd7de,    'zu', 0xd7e2,  'zuan', 0xd7ea,   'zui', 0xd7ec,   'zun', 0xd7f0,
       'zuo', 0xd7f2
    ]; // 'zuo' 结束于 0xd7f9
    

    利用输入法的字符表

    要能够处理更多的汉字,可以利用拼音输入法的字符表。输入法是从拼音到汉字,将其反过来使用就可以做到从汉字到拼音。

    程序角度主要是如何节省存储空间和查找效率的问题。可以改进的余地应该不会太大。

    拼音转汉字

    最常见的应用其实就是拼音输入法。只是拼音输入法除了简单的解决单个字的问题,需要更多的发挥在词库容量和匹配效率上。

    根据首字母缩写查找

    比如股票软件中根据股票名称首字母缩写查找。

    TODO:查找的方法。

    网络资源

    标签:

      分享到:
      comments powered by Disqus

      29/32ms