tsroten · TTWNO-zz · May 24, 2016 · May 24, 2016 · May 24, 2016 · May 24, 2016
diff --git a/AUTHORS.rst b/AUTHORS.rst
@@ -10,4 +10,12 @@ Author and Maintainer
 Contributors
 ------------
 
-None yet. Why not be the first?
+* Tait Hoyem <https://github.com/TTWNO> — HTML Formatting
+
+Why not be the second? :-)
+
+Attribution
+------------
+
+* Sun Jianai — FZKai-Extended font [used in pictures]
+* Google — [Source Sans Pro, Normal 400](https://www.google.com/fonts#QuickUsePlace:quickUse/Family:Source+Sans+Pro) [used for Pinyin font]
diff --git a/CHANGES.rst b/CHANGES.rst
@@ -3,6 +3,11 @@
 Change Log
 ----------
 
+0.3.0 (2016-05-27)
+++++++++++++++++++
+
+* Added HTML Formatting.
+
 0.2.6 (2016-05-23)
 ++++++++++++++++++
 

diff --git a/README.rst b/README.rst
@@ -4,7 +4,7 @@ Dragon Mapper
 
 .. image:: https://badge.fury.io/py/dragonmapper.png
     :target: http://badge.fury.io/py/dragonmapper
-    
+
 .. image:: https://travis-ci.org/tsroten/dragonmapper.png?branch=develop
         :target: https://travis-ci.org/tsroten/dragonmapper
 
@@ -22,27 +22,48 @@ Features
   Phonetic Alphabet.
 * Identify a string as Traditional or Simplified Chinese, Pinyin, Zhuyin, or
   the International Phonetic Alphabet.
+* Output HTML of characters with Pinyin attached to them.
 
 .. code:: python
 
+    >>> from dragonmapper import hanzi
     >>> s = '我是一个美国人。'
-    >>> dragonmapper.hanzi.is_simplified(s)
+    >>> hanzi.is_simplified(s)
     True
-    >>> dragonmapper.hanzi.to_pinyin(s)
+    >>> hanzi.to_pinyin(s)
     'wǒshìyīgèměiguórén。'
-    >>> dragonmapper.hanzi.to_pinyin(s, all_readings=True)
+    >>> hanzi.to_pinyin(s, all_readings=True)
     '[wǒ][shì/shi/tí][yī][gè/ge/gě/gàn][měi][guó][rén/ren]。'
 
 .. code:: python
 
+    >>> from dragonmapper import transcriptions as trans
     >>> s = 'Wǒ shì yīgè měiguórén.'
-    >>> dragonmapper.transcriptions.is_pinyin(s)
+    >>> trans.is_pinyin(s)
     True
-    >>> dragonmapper.transcriptions.pinyin_to_zhuyin(s)
+    >>> trans.pinyin_to_zhuyin(s)
     'ㄨㄛˇ ㄕˋ ㄧ ㄍㄜˋ ㄇㄟˇ ㄍㄨㄛˊ ㄖㄣˊ.'
-    >>> dragonmapper.transcriptions.pinyin_to_ipa(s)
+    >>> trans.pinyin_to_ipa(s)
     'wɔ˧˩˧ ʂɨ˥˩ i˥ kɤ˥˩ meɪ˧˩˧ kwɔ˧˥ ʐən˧˥.'
 
+.. code:: python
+
+    >>> from dragonmapper import transcriptions as trans
+    >>> form dragonmapper import hanzi
+    >>> from dragonmapper import html
+    >>> s = "我是加拿大人"
+    >>> zh = hanzi.to_zhuyin(s)
+    >>> pi = trans.zhuyin_to_pinyin(zh).split(' ')
+    >>> pi
+    ['wǒ', 'shì', 'jiā', 'ná', 'dà', 'rén']
+    >>> h = html.to_html(s, top=pi)
+    >>> print(h)
+
+* The intermediate switch to Zhuyin, is because of spacing. You can space out the characters instead.
+* Note: only top is aviable right now, as browsers do not currently support having it elsewhere.
+.. image:: https://s25.postimg.org/4s44wylcv/Screenshot_from_2016_08_03_15_59_03.png
+        :target: https://postimg.org/image/o9yscwiaj/
+
 Getting Started
 ---------------
 * `Install Dragon Mapper <http://dragonmapper.readthedocs.org/en/latest/installation.html>`_

diff --git a/docs/api.rst b/docs/api.rst
@@ -234,3 +234,9 @@ lines of code.
 .. autofunction:: to_zhuyin
 
 .. autofunction:: to_ipa
+
+HTML Conversion:
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Creates some HTML from the characters, and transcription systems you have.
+
+#.. autofunction:: to_html
diff --git a/docs/index.rst b/docs/index.rst
@@ -15,22 +15,24 @@ functions for Chinese text processing:
 
 .. code:: python
 
+    >>> from dragonmapper import hanzi
     >>> s = '我是一个美国人。'
-    >>> dragonmapper.hanzi.is_simplified(s)
+    >>> hanzi.is_simplified(s)
     True
-    >>> dragonmapper.hanzi.to_pinyin(s)
+    >>> hanzi.to_pinyin(s)
     'wǒshìyīgèměiguórén。'
-    >>> dragonmapper.hanzi.to_pinyin(s, all_readings=True)
+    >>> hanzi.to_pinyin(s, all_readings=True)
     '[wǒ][shì/shi/tí][yī][gè/ge/gě/gàn][měi][guó][rén/ren]。'
 
 .. code:: python
 
+    >>> from dragonmapper import transcriptions as trans
     >>> s = 'Wǒ shì yīgè měiguórén.'
-    >>> dragonmapper.transcriptions.is_pinyin(s)
+    >>> trans.is_pinyin(s)
     True
-    >>> dragonmapper.transcriptions.pinyin_to_zhuyin(s)
+    >>> trans.pinyin_to_zhuyin(s)
     'ㄨㄛˇ ㄕˋ ㄧ ㄍㄜˋ ㄇㄟˇ ㄍㄨㄛˊ ㄖㄣˊ.'
-    >>> dragonmapper.transcriptions.pinyin_to_ipa(s)
+    >>> trans.pinyin_to_ipa(s)
     'wɔ˧˩˧ ʂɨ˥˩ i˥ kɤ˥˩ meɪ˧˩˧ kwɔ˧˥ ʐən˧˥.'
 
 If this is your first time using Dragon Mapper, check out the :doc:`installation`.

diff --git a/dragonmapper/data/default-style.css b/dragonmapper/data/default-style.css
@@ -0,0 +1,40 @@
+/**
+Default CSS style for dragonmapper
+**/
+
+@import url(https://fonts.googleapis.com/css?family=Source+Sans+Pro);
+
+.hanzi{
+    font-size: 2em;
+    line-height: 1em;
+    text-align: center;
+    vertical-align: middle;
+}
+.punct{
+    font-size: 1.5em;
+    line-height: 1em;
+    text-align: center;
+    vertical-align: middle;
+}
+.zhuyin{
+    font-size: 0.6em;
+    line-height: 1em;
+    text-align: center;
+    vertical-align: middle;
+}
+.pinyin{
+    font-size: 1em;
+    line-height: 1em;
+    /** Some fonts have exess space on accented pinyin character,
+            setting the font fixes this problem. **/
+    font-family: 'Source Sans Pro', sans-serif;
+    text-align: center;
+    vertical-align: center;
+}
+.tone-mark{
+    font-size: 1em;
+    text-align: center;
+}
+.unknown{
+    visibility: collapse;
+}
diff --git a/dragonmapper/data/hanzi_pinyin_characters.tsv b/dragonmapper/data/hanzi_pinyin_characters.tsv
@@ -24969,8 +24969,8 @@
 䳠	shuì/zhù
 脽	shuí
 𧀣	shuí
-谁	shuí
-誰	shuí/shéi
+谁	shéi
+誰	shéi/shuí
 鎙	shuò
 硕	shuò
 𠲿	shuò

diff --git a/dragonmapper/data/transcriptions.csv b/dragonmapper/data/transcriptions.csv
@@ -65,6 +65,7 @@ dei,ㄉㄟ,teɪ
 den,ㄉㄣ,tən
 deng,ㄉㄥ,tɤŋ
 di,ㄉㄧ,ti
+dia,ㄉㄧㄚ,tjɑ
 dian,ㄉㄧㄢ,tjɛn
 diang,ㄉㄧㄤ,tjɑŋ
 diao,ㄉㄧㄠ,tjɑʊ
@@ -234,6 +235,7 @@ nun,ㄋㄨㄣ,nwən
 nuo,ㄋㄨㄛ,nwɔ
 nü,ㄋㄩ,ny
 nüe,ㄋㄩㄝ,nɥœ
+o,ㄛ,wɔ
 ou,ㄡ,oʊ
 pa,ㄆㄚ,pʰa
 pai,ㄆㄞ,pʰaɪ
@@ -368,6 +370,7 @@ ye,ㄧㄝ,jɛ
 yi,ㄧ,i
 yin,ㄧㄣ,in
 ying,ㄧㄥ,iŋ
+yo,ㄧㄛ,jʊ
 yong,ㄩㄥ,yʊŋ
 you,ㄧㄡ,yoʊ
 yu,ㄩ,y

diff --git a/dragonmapper/hanzi.py b/dragonmapper/hanzi.py
@@ -34,7 +34,8 @@
 
 
 def _load_data():
-    """Load the word and character mapping data into a dictionary.
+    r"""
+    Load the word and character mapping data into a dictionary.
 
     In the data files, each line is formatted like this:
         HANZI   PINYIN_READING/PINYIN_READING
@@ -58,8 +59,9 @@ def _load_data():
 _WORDS = _HANZI_PINYIN_MAP['words']
 
 
-def _hanzi_to_pinyin(hanzi):
-    """Return the Pinyin reading for a Chinese word.
+def _hanzi_to_pinyin(hanzi, DICT=None):
+    """
+    Return the Pinyin reading for a Chinese word.
 
     If the given string *hanzi* matches a CC-CEDICT word, the return value is
     formatted like this: [WORD_READING1, WORD_READING2, ...]
@@ -71,10 +73,18 @@ def _hanzi_to_pinyin(hanzi):
     original character is returned, e.g. [[CHAR_READING1, ...], CHAR, ...]
 
     """
+    if DICT is None:
+        DICT = _HANZI_PINYIN_MAP
+
     try:
-        return _HANZI_PINYIN_MAP['words'][hanzi]
+        return DICT['words'][hanzi]
     except KeyError:
-        return [_CHARACTERS.get(character, character) for character in hanzi]
+        return [
+            DICT['characters'].get(
+                character,
+                character)
+            for character in hanzi
+        ]
 
 
 def _enclose_readings(container, readings):
@@ -88,7 +98,8 @@ def _enclose_readings(container, readings):
 
 def to_pinyin(s, delimiter=' ', all_readings=False, container='[]',
               accented=True):
-    """Convert a string's Chinese characters to Pinyin readings.
+    """
+    Convert a string's Chinese characters to Pinyin readings.
 
     *s* is a string containing Chinese characters. *accented* is a
     boolean value indicating whether to return accented or numbered Pinyin
@@ -169,7 +180,8 @@ def to_pinyin(s, delimiter=' ', all_readings=False, container='[]',
 
 
 def to_zhuyin(s, delimiter=' ', all_readings=False, container='[]'):
-    """Convert a string's Chinese characters to Zhuyin readings.
+    """
+    Convert a string's Chinese characters to Zhuyin readings.
 
     *s* is a string containing Chinese characters.
 
@@ -192,7 +204,8 @@ def to_zhuyin(s, delimiter=' ', all_readings=False, container='[]'):
 
 
 def to_ipa(s, delimiter=' ', all_readings=False, container='[]'):
-    """Convert a string's Chinese characters to IPA.
+    """
+    Convert a string's Chinese characters to IPA.
 
     *s* is a string containing Chinese characters.
 
@@ -212,3 +225,25 @@ def to_ipa(s, delimiter=' ', all_readings=False, container='[]'):
     numbered_pinyin = to_pinyin(s, delimiter, all_readings, container, False)
     ipa = pinyin_to_ipa(numbered_pinyin)
     return ipa
+
+
+def to_jyutping(s, delimiter=' ', all_readings=False, container='[]'):
+    """
+    Convert a string's Chinese characters to Jyutping.
+
+    *s* is a string containing Chinese characters.
+
+    *delimiter* is the character used to indicate word boundaries in *s*.
+    This is used to differentiate between words and characters so that a more
+    accurate reading can be returned.
+
+    *all_readings* is a boolean value indicating whether or not to return all
+    possible readings in the case of words/characters that have multiple
+    readings. *container* is a two character string that is used to
+    enclose words/characters if *all_readings* is ``True``. The default
+    ``'[]'`` is used like this: ``'[READING1/READING2]'``.
+
+    Characters not recognized as Chinese are left untouched.
+
+    """
+    pass