From af8a68953ee4d523f2d898e542a6b36da36d5eb9 Mon Sep 17 00:00:00 2001 From: Fize Jacques Date: Mon, 2 Jul 2018 14:06:27 +0200 Subject: [PATCH 1/4] -Add French support based on https://github.com/sblondon/pluralizefr/blob/master/pluralizefr/__init__.py -Add support for Python_3 (delete unicode() call) --- inflector.py | 5 +- rules/english.py | 2 +- rules/french.py | 120 +++++++++++++++++++++++++++++++++++++++++++++++ rules/spanish.py | 34 +++++++------- 4 files changed, 139 insertions(+), 22 deletions(-) create mode 100644 rules/french.py diff --git a/inflector.py b/inflector.py index c9be1c8..8062718 100644 --- a/inflector.py +++ b/inflector.py @@ -17,9 +17,8 @@ class Inflector: based on naming conventions like on Ruby on Rails. """ - def __init__(self, Inflector=English): - assert callable(Inflector), "Inflector should be a callable obj" - self.Inflector = apply(Inflector) + def __init__(self, Inflector=English()): + self.Inflector = Inflector def pluralize(self, word): '''Pluralizes nouns.''' diff --git a/rules/english.py b/rules/english.py index 42d1b2e..412d9a7 100644 --- a/rules/english.py +++ b/rules/english.py @@ -7,7 +7,7 @@ # (BSD-style). import re -from base import Base +from .base import Base class English (Base): diff --git a/rules/french.py b/rules/french.py new file mode 100644 index 0000000..7471e1b --- /dev/null +++ b/rules/french.py @@ -0,0 +1,120 @@ +#!/usr/bin/env python + +# Copyright (c) 2006 Bermi Ferrer Martinez +# bermi a-t bermilabs - com +# +# See the end of this file for the free software, open source license +# (BSD-style). + +import re +from .base import Base + + +class French (Base): + """ + Inflector for pluralize and singularize English nouns. + + This is the default Inflector for the Inflector obj + """ + + def pluralize(self,word): + for GRAMMAR_RULE in (self._ail_word, self._al_word, self._au_word, self._eil_word, self._eu_word, self._ou_word, self._s_word, self._x_word, self._z_word, + self._default): + plural = GRAMMAR_RULE(word) + if plural: + return plural + + def _ail_word(self,word): + if word.endswith("ail"): + if word == "ail": + return "aulx" + elif word in ( + "bail", "corail", u"émail", "fermail", "soupirail", "travail", "vantail", "ventail", "vitrail"): + return word[:-3] + "aux" + return word + "s" + + def _al_word(self,word): + if word.endswith("al"): + if word in ( + "bal", "carnaval", "chacal", "festival", u"récital", u"régal", + "bancal", "fatal", "fractal", "final", "morfal", "natal", "naval", + u"aéronaval", + u"anténatal", u"néonatal", u"périnatal", u"postnatal", u"prénatal", + "tonal", "atonal", "bitonal", "polytonal", + "corral", "deal", "goal", "autogoal", "revival", "serial", "spiritual", "trial", + "caracal", "chacal", "gavial", "gayal", "narval", "quetzal", "rorqual", "serval", + "metical", "rial", "riyal", "ryal", + "cantal", "emmental", "emmenthal", + u"floréal", "germinal", "prairial", + ): + return word + "s" + return word[:-2] + "aux" + + def _au_word(self,word): + if word.endswith("au"): + if word in ("berimbau", "donau", "karbau", "landau", "pilau", "sarrau", "unau"): + return word + "s" + return word + "x" + + def _eil_word(self,word): + if word.endswith("eil"): + return "vieux" if word == "vieil" else word + "s" + + def _eu_word(self,word): + if word.endswith("eu"): + if word in ("bleu", u"émeu", "enfeu", "pneu", "rebeu"): + return word + "s" + return word + "x" + + def _ou_word(self,word): + if word.endswith("ou"): + if word in ("bijou", "caillou", "chou", "genou", "hibou", "joujou", "pou"): + return word + "x" + return word + "s" + + def _s_word(self,word): + if word[-1] == "s": + return word + + def _x_word(self,word): + if word[-1] == "x": + return word + + def _z_word(self,word): + if word[-1] == "z": + return word + + def _default(self,word): + return word + "s" + + def singularize(self, word): + '''Singularizes English nouns.''' + + word=word.lower() + + if word in set(["baux", "coraux", "émaux", "fermaux", "soupiraux", "travaux", "vantaux", "ventaux", "vitraux"]): + return word[:-3] + "ail" + if word.endswith("als") or word.endswith("aux"): + return word[:-3]+"al" + if word.endswith == "vieux": + return "vieil" + if word.endswith("x") or word.endswith("s"): + return word[:-1] + + + +# Copyright (c) 2006 Bermi Ferrer Martinez +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software to deal in this software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of this software, and to permit +# persons to whom this software is furnished to do so, subject to the following +# condition: +# +# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THIS SOFTWARE. diff --git a/rules/spanish.py b/rules/spanish.py index 2c9f283..8335fdc 100644 --- a/rules/spanish.py +++ b/rules/spanish.py @@ -10,7 +10,7 @@ # (BSD-style). import re -from base import Base +from .base import Base import utils @@ -90,8 +90,7 @@ def pluralize(self, word): replacement = rule[1] if re.match(u'\|', replacement): for k in range(1, len(groups)): - replacement = replacement.replace(u'|' + unicode( - k), self.string_replace(groups[k - 1], u'ÁÉÍÓÚáéíóú', u'AEIOUaeiou')) + replacement = replacement.replace(u'|' + k, self.string_replace(groups[k - 1], u'ÁÉÍÓÚáéíóú', u'AEIOUaeiou')) result = re.sub(rule[0], replacement, word) # Esto acentúa los sustantivos que al pluralizarse se @@ -117,19 +116,19 @@ def singularize(self, word): word, origType = utils.unicodify(word) # all internal calculations are done in Unicode rules = [ - [ur'(?i)^([bcdfghjklmnñpqrstvwxyz]*)([aeiou])([ns])es$', u'\\1\\2\\3'], - [ur'(?i)([aeiou])([ns])es$', u'~1\\2'], - [ur'(?i)shes$', u'sh'], # flashes->flash - [ur'(?i)oides$', u'oide'], # androides->androide - [ur'(?i)(sis|tis|xis)$', u'\\1'], # crisis, apendicitis, praxis - [ur'(?i)(é)s$', u'\\1'], # bebés->bebé - [ur'(?i)(ces)$', u'z'], # luces->luz - [ur'(?i)([^e])s$', u'\\1'], # casas->casa - [ur'(?i)([bcdfghjklmnñprstvwxyz]{2,}e)s$', u'\\1'], # cofres->cofre - [ur'(?i)([ghñptv]e)s$', u'\\1'], # llaves->llave, radiocasetes->radiocasete - [ur'(?i)jes$', u'je'], # ejes->eje - [ur'(?i)ques$', u'que'], # tanques->tanque - [ur'(?i)es$', u''] # ELSE remove _es_ monitores->monitor + [r'(?i)^([bcdfghjklmnñpqrstvwxyz]*)([aeiou])([ns])es$', u'\\1\\2\\3'], + [r'(?i)([aeiou])([ns])es$', u'~1\\2'], + [r'(?i)shes$', u'sh'], # flashes->flash + [r'(?i)oides$', u'oide'], # androides->androide + [r'(?i)(sis|tis|xis)$', u'\\1'], # crisis, apendicitis, praxis + [r'(?i)(é)s$', u'\\1'], # bebés->bebé + [r'(?i)(ces)$', u'z'], # luces->luz + [r'(?i)([^e])s$', u'\\1'], # casas->casa + [r'(?i)([bcdfghjklmnñprstvwxyz]{2,}e)s$', u'\\1'], # cofres->cofre + [r'(?i)([ghñptv]e)s$', u'\\1'], # llaves->llave, radiocasetes->radiocasete + [r'(?i)jes$', u'je'], # ejes->eje + [r'(?i)ques$', u'que'], # tanques->tanque + [r'(?i)es$', u''] # ELSE remove _es_ monitores->monitor ] lower_cased_word = word.lower() @@ -151,8 +150,7 @@ def singularize(self, word): replacement = rule[1] if re.match(u'~', replacement): for k in range(1, len(groups)): - replacement = replacement.replace(u'~' + unicode( - k), self.string_replace(groups[k - 1], u'AEIOUaeiou', u'ÁÉÍÓÚáéíóú')) + replacement = replacement.replace(u'~' + k, self.string_replace(groups[k - 1], u'AEIOUaeiou', u'ÁÉÍÓÚáéíóú')) result = re.sub(rule[0], replacement, word) # Esta es una posible solución para el problema de dobles From ffebd6aa23d73fbd52580a07503da5945634a2ed Mon Sep 17 00:00:00 2001 From: Fize Jacques Date: Mon, 2 Jul 2018 14:50:14 +0200 Subject: [PATCH 2/4] UPDATES: * Finalize Python3 support (remove unicode) * Change Readme * Add "setup.py" --- README.markdown => README.md | 97 +++++--- inflector/__init__.py | 7 + inflector.py => inflector/inflector.py | 252 ++++++++++----------- __init__.py => inflector/rules/__init__.py | 0 {rules => inflector/rules}/base.py | 0 {rules => inflector/rules}/english.py | 0 {rules => inflector/rules}/french.py | 0 inflector/rules/spanish.py | 183 +++++++++++++++ rules/__init__.py | 0 rules/spanish.py | 184 --------------- setup.py | 13 ++ tests_es.py | 2 +- utils.py | 44 ---- 13 files changed, 396 insertions(+), 386 deletions(-) rename README.markdown => README.md (56%) create mode 100644 inflector/__init__.py rename inflector.py => inflector/inflector.py (96%) rename __init__.py => inflector/rules/__init__.py (100%) rename {rules => inflector/rules}/base.py (100%) rename {rules => inflector/rules}/english.py (100%) rename {rules => inflector/rules}/french.py (100%) create mode 100644 inflector/rules/spanish.py delete mode 100644 rules/__init__.py delete mode 100644 rules/spanish.py create mode 100644 setup.py delete mode 100644 utils.py diff --git a/README.markdown b/README.md similarity index 56% rename from README.markdown rename to README.md index 580285f..d24a937 100644 --- a/README.markdown +++ b/README.md @@ -2,48 +2,85 @@ The Inflector is used for getting the plural and singular form of nouns. This piece of code helps on creating code that favors convention over configuration. -Only English and Spanish nouns are supported. The English version is a port of Ruby on Rails Inflector, while the Spanish Version has been developed from scratch with the help of Carles Sadurní. +Only English, French and Spanish nouns are supported. The English version is a port of Ruby on Rails Inflector, while the Spanish Version has been developed from scratch with the help of Carles Sadurní. +The French version was implement by [sblondon](https://github.com/sblondon/pluralizefr) -Apart from converting singulars and plurals, this module also handles necessary string conversion for convention based applications like: +Apart from converting singulars and plurals, this module also handles necessary string + conversion for convention based applications like: *tableize*, *urlize*, and so forth. -Available methods are: -## pluralize(word) +## Requirements -Pluralizes nouns. + * Python 3.x -## singularize(word) +## Getting started -Singularizes nouns. +To install the inflector package, move to *inflector* directory, then run -## conditionalPlural(numer_of_records, word) + $ pip install . + +or if necessary + + $ pip3 install . + +To work with the inflector, import the `Inflector` and the language support modules: + +```{python} +>>> from inflector import Inflector, French, English, Spanish +``` + +Then, to pluralize, run the following code +```{python} +>>> Inflector(English()).pluralize("matrix") +'matrices' +>>> Inflector(French()).pluralize("cheval") +'chevaux' +>>>Inflector(Spanish()).pluralize("arbol") +'arboles' +``` + +Lastly, if you want to singularize, run : +```{python} +>>> Inflector(English()).pluralize("matrices") +'matrix' +>>> Inflector(French()).singularize("bijous") +'bijou' +>>> Inflector(Spanish()).singularize("Regímenes") +'Régimen' +``` -Returns the plural form of a word if first parameter is greater than 1 -## titleize(word, uppercase = '') +## Methods available + * **pluralize(word)** +Pluralizes nouns. + + * **singularize(word)** +Singularizes nouns. + + * **conditionalPlural(numer_of_records, word) +Returns the plural form of a word if first parameter is greater than 1 + + * **titleize(word, uppercase = '')** Converts an underscored or CamelCase word into a sentence. The titleize function converts text like "WelcomePage", "welcome_page" or "welcome page" to this "Welcome Page". If the "uppercase" parameter is set to 'first' it will only capitalize the first character of the title. -## camelize(word): - + * **camelize(word):** Returns given word as CamelCased Converts a word like "send_email" to "SendEmail". It will remove non alphanumeric character from the word, so "who's online" will be converted to "WhoSOnline" -## underscore(word) - + * **underscore(word)** Converts a word "into_it_s_underscored_version" Convert any "CamelCased" or "ordinary Word" into an "underscored_word". This can be really useful for creating friendly URLs. -## humanize(word, uppercase = '') - + * **humanize(word, uppercase = '')** Returns a human-readable string from word Returns a human-readable string from word, by replacing underscores with a space, and by upper-casing the initial @@ -51,40 +88,38 @@ character by default. If you need to uppercase all the words you just have to pass 'all' as a second parameter. - -## variablize(word) - + * **variablize(word)** Same as camelize but first char is lowercased Converts a word like "send_email" to "sendEmail". It will remove non alphanumeric character from the word, so "who's online" will be converted to "whoSOnline" return self.Inflector.variablize(word) -## tableize(class_name) - + * **tableize(class_name)** Converts a class name to its table name according to rails naming conventions. Example. Converts "Person" to "people" -## classify(table_name) - + * **classify(table_name)** Converts a table name to its class name according to rails naming conventions. Example: Converts "people" to "Person" - -## ordinalize(number) +*) + * **ordinalize(number)** Converts number to its ordinal form. This method converts 13 to 13th, 2 to 2nd ... -## unaccent(text) - + * **unaccent(text)** Transforms a string to its unaccented version. This might be useful for generating "friendly" URLs -## urlize(text) - + * **urlize(text)** Transform a string its unaccented and underscored version ready to be inserted in friendly URLs -## foreignKey(class_name, separate_class_name_and_id_with_underscore = 1) - + * **foreignKey(class_name, separate_class_name_and_id_with_underscore = 1)** Returns class_name in underscored form, with "_id" tacked on at the end. This is for use in dealing with the database. + + +## Acknowledgement + + diff --git a/inflector/__init__.py b/inflector/__init__.py new file mode 100644 index 0000000..f650bbc --- /dev/null +++ b/inflector/__init__.py @@ -0,0 +1,7 @@ +#coding=utf-8 + +from .rules.english import English +from .rules.french import French +from .rules.spanish import Spanish + +from .inflector import Inflector \ No newline at end of file diff --git a/inflector.py b/inflector/inflector.py similarity index 96% rename from inflector.py rename to inflector/inflector.py index 8062718..643ae5a 100644 --- a/inflector.py +++ b/inflector/inflector.py @@ -1,126 +1,126 @@ -#!/usr/bin/env python - -# Copyright (c) 2006 Bermi Ferrer Martinez -# -# bermi a-t bermilabs - com -# See the end of this file for the free software, open source license -# (BSD-style). - -from rules.english import English - - -class Inflector: - """ - Inflector for pluralizing and singularizing nouns. - - It provides methods for helping on creating programs - based on naming conventions like on Ruby on Rails. - """ - - def __init__(self, Inflector=English()): - self.Inflector = Inflector - - def pluralize(self, word): - '''Pluralizes nouns.''' - return self.Inflector.pluralize(word) - - def singularize(self, word): - '''Singularizes nouns.''' - return self.Inflector.singularize(word) - - def conditionalPlural(self, numer_of_records, word): - '''Returns the plural form of a word if first parameter is greater than 1''' - return self.Inflector.conditionalPlural(numer_of_records, word) - - def titleize(self, word, uppercase=''): - '''Converts an underscored or CamelCase word into a sentence. - The titleize function converts text like "WelcomePage", - "welcome_page" or "welcome page" to this "Welcome Page". - If the "uppercase" parameter is set to 'first' it will only - capitalize the first character of the title.''' - return self.Inflector.titleize(word, uppercase) - - def camelize(self, word): - ''' Returns given word as CamelCased - Converts a word like "send_email" to "SendEmail". It - will remove non alphanumeric character from the word, so - "who's online" will be converted to "WhoSOnline"''' - return self.Inflector.camelize(word) - - def underscore(self, word): - ''' Converts a word "into_it_s_underscored_version" - Convert any "CamelCased" or "ordinary Word" into an - "underscored_word". - This can be really useful for creating friendly URLs.''' - return self.Inflector.underscore(word) - - def humanize(self, word, uppercase=''): - '''Returns a human-readable string from word - Returns a human-readable string from word, by replacing - underscores with a space, and by upper-casing the initial - character by default. - If you need to uppercase all the words you just have to - pass 'all' as a second parameter.''' - return self.Inflector.humanize(word, uppercase) - - def variablize(self, word): - '''Same as camelize but first char is lowercased - Converts a word like "send_email" to "sendEmail". It - will remove non alphanumeric character from the word, so - "who's online" will be converted to "whoSOnline"''' - return self.Inflector.variablize(word) - - def tableize(self, class_name): - ''' Converts a class name to its table name according to rails - naming conventions. Example. Converts "Person" to "people" ''' - return self.Inflector.tableize(class_name) - - def classify(self, table_name): - '''Converts a table name to its class name according to rails - naming conventions. Example: Converts "people" to "Person" ''' - return self.Inflector.classify(table_name) - - def ordinalize(self, number): - '''Converts number to its ordinal form. - This method converts 13 to 13th, 2 to 2nd ...''' - return self.Inflector.ordinalize(number) - - def unaccent(self, text): - '''Transforms a string to its unaccented version. - This might be useful for generating "friendly" URLs''' - return self.Inflector.unaccent(text) - - def urlize(self, text): - '''Transform a string its unaccented and underscored - version ready to be inserted in friendly URLs''' - return self.Inflector.urlize(text) - - def demodulize(self, module_name): - return self.Inflector.demodulize(module_name) - - def modulize(self, module_description): - return self.Inflector.modulize(module_description) - - def foreignKey(self, class_name, separate_class_name_and_id_with_underscore=1): - ''' Returns class_name in underscored form, with "_id" tacked on at the end. - This is for use in dealing with the database.''' - return self.Inflector.foreignKey(class_name, separate_class_name_and_id_with_underscore) - - - - -# Copyright (c) 2006 Bermi Ferrer Martinez -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software to deal in this software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this software, and to permit -# persons to whom this software is furnished to do so, subject to the following -# condition: -# -# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THIS SOFTWARE. +#!/usr/bin/env python + +# Copyright (c) 2006 Bermi Ferrer Martinez +# +# bermi a-t bermilabs - com +# See the end of this file for the free software, open source license +# (BSD-style). + +from .rules.english import English + + +class Inflector: + """ + Inflector for pluralizing and singularizing nouns. + + It provides methods for helping on creating programs + based on naming conventions like on Ruby on Rails. + """ + + def __init__(self, Inflector=English()): + self.Inflector = Inflector + + def pluralize(self, word): + '''Pluralizes nouns.''' + return self.Inflector.pluralize(word) + + def singularize(self, word): + '''Singularizes nouns.''' + return self.Inflector.singularize(word) + + def conditionalPlural(self, numer_of_records, word): + '''Returns the plural form of a word if first parameter is greater than 1''' + return self.Inflector.conditionalPlural(numer_of_records, word) + + def titleize(self, word, uppercase=''): + '''Converts an underscored or CamelCase word into a sentence. + The titleize function converts text like "WelcomePage", + "welcome_page" or "welcome page" to this "Welcome Page". + If the "uppercase" parameter is set to 'first' it will only + capitalize the first character of the title.''' + return self.Inflector.titleize(word, uppercase) + + def camelize(self, word): + ''' Returns given word as CamelCased + Converts a word like "send_email" to "SendEmail". It + will remove non alphanumeric character from the word, so + "who's online" will be converted to "WhoSOnline"''' + return self.Inflector.camelize(word) + + def underscore(self, word): + ''' Converts a word "into_it_s_underscored_version" + Convert any "CamelCased" or "ordinary Word" into an + "underscored_word". + This can be really useful for creating friendly URLs.''' + return self.Inflector.underscore(word) + + def humanize(self, word, uppercase=''): + '''Returns a human-readable string from word + Returns a human-readable string from word, by replacing + underscores with a space, and by upper-casing the initial + character by default. + If you need to uppercase all the words you just have to + pass 'all' as a second parameter.''' + return self.Inflector.humanize(word, uppercase) + + def variablize(self, word): + '''Same as camelize but first char is lowercased + Converts a word like "send_email" to "sendEmail". It + will remove non alphanumeric character from the word, so + "who's online" will be converted to "whoSOnline"''' + return self.Inflector.variablize(word) + + def tableize(self, class_name): + ''' Converts a class name to its table name according to rails + naming conventions. Example. Converts "Person" to "people" ''' + return self.Inflector.tableize(class_name) + + def classify(self, table_name): + '''Converts a table name to its class name according to rails + naming conventions. Example: Converts "people" to "Person" ''' + return self.Inflector.classify(table_name) + + def ordinalize(self, number): + '''Converts number to its ordinal form. + This method converts 13 to 13th, 2 to 2nd ...''' + return self.Inflector.ordinalize(number) + + def unaccent(self, text): + '''Transforms a string to its unaccented version. + This might be useful for generating "friendly" URLs''' + return self.Inflector.unaccent(text) + + def urlize(self, text): + '''Transform a string its unaccented and underscored + version ready to be inserted in friendly URLs''' + return self.Inflector.urlize(text) + + def demodulize(self, module_name): + return self.Inflector.demodulize(module_name) + + def modulize(self, module_description): + return self.Inflector.modulize(module_description) + + def foreignKey(self, class_name, separate_class_name_and_id_with_underscore=1): + ''' Returns class_name in underscored form, with "_id" tacked on at the end. + This is for use in dealing with the database.''' + return self.Inflector.foreignKey(class_name, separate_class_name_and_id_with_underscore) + + + + +# Copyright (c) 2006 Bermi Ferrer Martinez +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software to deal in this software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of this software, and to permit +# persons to whom this software is furnished to do so, subject to the following +# condition: +# +# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THIS SOFTWARE. diff --git a/__init__.py b/inflector/rules/__init__.py similarity index 100% rename from __init__.py rename to inflector/rules/__init__.py diff --git a/rules/base.py b/inflector/rules/base.py similarity index 100% rename from rules/base.py rename to inflector/rules/base.py diff --git a/rules/english.py b/inflector/rules/english.py similarity index 100% rename from rules/english.py rename to inflector/rules/english.py diff --git a/rules/french.py b/inflector/rules/french.py similarity index 100% rename from rules/french.py rename to inflector/rules/french.py diff --git a/inflector/rules/spanish.py b/inflector/rules/spanish.py new file mode 100644 index 0000000..2d95fb1 --- /dev/null +++ b/inflector/rules/spanish.py @@ -0,0 +1,183 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# coding=utf-8 +# Copyright (c) 2006 Bermi Ferrer Martinez +# Copyright (c) 2006 Carles Sadurní Anguita +# +# bermi a-t bermilabs - com +# +# See the end of this file for the free software, open source license +# (BSD-style). + +import re +from .base import Base + + +class Spanish (Base): + ''' + Inflector for pluralize and singularize Spanish nouns. + ''' + + irregular_words = { + 'base': 'bases', + 'carácter': 'caracteres', + 'champú': 'champús', + 'curriculum': 'currículos', + 'espécimen': 'especímenes', + 'jersey': 'jerséis', + 'memorándum': 'memorandos', + 'menú': 'menús', + 'no': 'noes', + 'país': 'países', + 'referéndum': 'referendos', + 'régimen': 'regímenes', + 'sándwich': 'sándwiches', + 'si': 'sis', # Nota musical ALERTA: ¡provoca efectos secundarios! + 'taxi': 'taxis', + 'ultimátum': 'ultimatos', + } + + # These words either have the same form in singular and plural, or have no singular form at all + non_changing_words = [ + 'lunes', 'martes', 'miércoles', 'jueves', 'viernes', + 'paraguas', 'tijeras', 'gafas', 'vacaciones', 'víveres', + 'cumpleaños', 'virus', 'atlas', 'sms', 'hummus', + ] + + + def pluralize(self, word): + ''' + Pluralizes Spanish nouns. + Input string can be Unicode (e.g. u"palabra"), or a str encoded in UTF-8 or Latin-1. + Output string will be encoded the same way as the input. + ''' + + #word, origType = utils.unicodify(word) # all internal calculations are done in Unicode + + rules = [ + ['(?i)([aeiou])x$', '\\1x'], + # This could fail if the word is oxytone. + ['(?i)([áéíóú])([ns])$', '|1\\2es'], + ['(?i)(^[bcdfghjklmnñpqrstvwxyz]*)an$', '\\1anes'], # clan->clanes + ['(?i)([áéíóú])s$', '|1ses'], + ['(?i)(^[bcdfghjklmnñpqrstvwxyz]*)([aeiou])([ns])$', '\\1\\2\\3es'], # tren->trenes + ['(?i)([aeiouáéó])$', '\\1s'], # casa->casas, padre->padres, papá->papás + ['(?i)([aeiou])s$', '\\1s'], # atlas->atlas, virus->virus, etc. + ['(?i)([éí])(s)$', '|1\\2es'], # inglés->ingleses + ['(?i)z$', 'ces'], # luz->luces + ['(?i)([íú])$', '\\1es'], # ceutí->ceutíes, tabú->tabúes + ['(?i)(ng|[wckgtp])$', '\\1s'], # Anglicismos como puenting, frac, crack, show (En que casos podría fallar esto?) + ['(?i)$', 'es'] # ELSE +es (v.g. árbol->árboles) + ] + + lower_cased_word = word.lower() + + for uncountable_word in self.non_changing_words: + if lower_cased_word[-1 * len(uncountable_word):] == uncountable_word: + return word + + for irregular_singular, irregular_plural in self.irregular_words.items(): + match = re.search('(?i)(^' + irregular_singular + ')$', word, re.IGNORECASE) + if match: + result = re.sub('(?i)' + irregular_singular + '$', match.expand('\\1')[0] + irregular_plural[1:], word) + return result + + for rule in rules: + match = re.search(rule[0], word, re.IGNORECASE) + if match: + groups = match.groups() + replacement = rule[1] + if re.match('\|', replacement): + for k in range(1, len(groups)): + replacement = replacement.replace('|' + k, self.string_replace(groups[k - 1], 'ÁÉÍÓÚáéíóú', 'AEIOUaeio')) + + result = re.sub(rule[0], replacement, word) + # Esto acentúa los sustantivos que al pluralizarse se + # convierten en esdrújulos como esmóquines, jóvenes... + match = re.search('(?i)([aeiou]).{1,3}([aeiou])nes$', result) + + if match and len(match.groups()) > 1 and not re.search('(?i)[áéíóú]', word): + result = result.replace(match.group(0), self.string_replace( + match.group(1), 'AEIOUaeio', 'ÁÉÍÓÚáéíóú') + match.group(0)[1:]) + + return result + + return word + + + def singularize(self, word): + ''' + Singularizes Spanish nouns. + Input string can be Unicode (e.g. u"palabras"), or a str encoded in UTF-8 or Latin-1. + Output string will be encoded the same way as the input. + ''' + + # all internal calculations are done in Unicode + + rules = [ + [r'(?i)^([bcdfghjklmnñpqrstvwxyz]*)([aeiou])([ns])es$', '\\1\\2\\3'], + [r'(?i)([aeiou])([ns])es$', '~1\\2'], + [r'(?i)shes$', 'sh'], # flashes->flash + [r'(?i)oides$', 'oide'], # androides->androide + [r'(?i)(sis|tis|xis)$', '\\1'], # crisis, apendicitis, praxis + [r'(?i)(é)s$', '\\1'], # bebés->bebé + [r'(?i)(ces)$', 'z'], # luces->luz + [r'(?i)([^e])s$', '\\1'], # casas->casa + [r'(?i)([bcdfghjklmnñprstvwxyz]{2,}e)s$', '\\1'], # cofres->cofre + [r'(?i)([ghñptv]e)s$', '\\1'], # llaves->llave, radiocasetes->radiocasete + [r'(?i)jes$', 'je'], # ejes->eje + [r'(?i)ques$', 'que'], # tanques->tanque + [r'(?i)es$', ''] # ELSE remove _es_ monitores->monitor + ] + + lower_cased_word = word.lower() + + for uncountable_word in self.non_changing_words: + if lower_cased_word[-1 * len(uncountable_word):] == uncountable_word: + return word + + for irregular_singular, irregular_plural in self.irregular_words.items(): + match = re.search('(^' + irregular_plural + ')$', word, re.IGNORECASE) + if match: + result = re.sub('(?i)' + irregular_plural + '$', match.expand('\\1')[0] + irregular_singular[1:], word) + return result + + for rule in rules: + match = re.search(rule[0], word, re.IGNORECASE) + if match: + groups = match.groups() + replacement = rule[1] + if re.match('~', replacement): + for k in range(1, len(groups)): + replacement = replacement.replace('~' + k, self.string_replace(groups[k - 1], 'AEIOUaeio', 'ÁÉÍÓÚáéíóú')) + + result = re.sub(rule[0], replacement, word) + # Esta es una posible solución para el problema de dobles + # acentos. Un poco guarrillo pero funciona + match = re.search('(?i)([áéíóú]).*([áéíóú])', result) + + if match and len(match.groups()) > 1 and not re.search('(?i)[áéíóú]', word): + result = self.string_replace( + result, 'ÁÉÍÓÚáéíóú', 'AEIOUaeio') + + return result + + return word + + +# Copyright (c) 2006 Bermi Ferrer Martinez +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software to deal in this software without restriction, including +# without limitation the rights to use, copy, modify, merge, publish, +# distribute, sublicense, and/or sell copies of this software, and to permit +# persons to whom this software is furnished to do so, subject to the following +# condition: +# +# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THIS SOFTWARE. + diff --git a/rules/__init__.py b/rules/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/rules/spanish.py b/rules/spanish.py deleted file mode 100644 index 8335fdc..0000000 --- a/rules/spanish.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# coding=utf-8 -# Copyright (c) 2006 Bermi Ferrer Martinez -# Copyright (c) 2006 Carles Sadurní Anguita -# -# bermi a-t bermilabs - com -# -# See the end of this file for the free software, open source license -# (BSD-style). - -import re -from .base import Base -import utils - - -class Spanish (Base): - ''' - Inflector for pluralize and singularize Spanish nouns. - ''' - - irregular_words = { - u'base': u'bases', - u'carácter': u'caracteres', - u'champú': u'champús', - u'curriculum': u'currículos', - u'espécimen': u'especímenes', - u'jersey': u'jerséis', - u'memorándum': u'memorandos', - u'menú': u'menús', - u'no': u'noes', - u'país': u'países', - u'referéndum': u'referendos', - u'régimen': u'regímenes', - u'sándwich': u'sándwiches', - u'si': u'sis', # Nota musical ALERTA: ¡provoca efectos secundarios! - u'taxi': u'taxis', - u'ultimátum': u'ultimatos', - } - - # These words either have the same form in singular and plural, or have no singular form at all - non_changing_words = [ - u'lunes', u'martes', u'miércoles', u'jueves', u'viernes', - u'paraguas', u'tijeras', u'gafas', u'vacaciones', u'víveres', - u'cumpleaños', u'virus', u'atlas', u'sms', u'hummus', - ] - - - def pluralize(self, word): - ''' - Pluralizes Spanish nouns. - Input string can be Unicode (e.g. u"palabra"), or a str encoded in UTF-8 or Latin-1. - Output string will be encoded the same way as the input. - ''' - - word, origType = utils.unicodify(word) # all internal calculations are done in Unicode - - rules = [ - [u'(?i)([aeiou])x$', u'\\1x'], - # This could fail if the word is oxytone. - [u'(?i)([áéíóú])([ns])$', u'|1\\2es'], - [u'(?i)(^[bcdfghjklmnñpqrstvwxyz]*)an$', u'\\1anes'], # clan->clanes - [u'(?i)([áéíóú])s$', u'|1ses'], - [u'(?i)(^[bcdfghjklmnñpqrstvwxyz]*)([aeiou])([ns])$', u'\\1\\2\\3es'], # tren->trenes - [u'(?i)([aeiouáéó])$', u'\\1s'], # casa->casas, padre->padres, papá->papás - [u'(?i)([aeiou])s$', u'\\1s'], # atlas->atlas, virus->virus, etc. - [u'(?i)([éí])(s)$', u'|1\\2es'], # inglés->ingleses - [u'(?i)z$', u'ces'], # luz->luces - [u'(?i)([íú])$', u'\\1es'], # ceutí->ceutíes, tabú->tabúes - [u'(?i)(ng|[wckgtp])$', u'\\1s'], # Anglicismos como puenting, frac, crack, show (En que casos podría fallar esto?) - [u'(?i)$', u'es'] # ELSE +es (v.g. árbol->árboles) - ] - - lower_cased_word = word.lower() - - for uncountable_word in self.non_changing_words: - if lower_cased_word[-1 * len(uncountable_word):] == uncountable_word: - return utils.deunicodify(word, origType) - - for irregular_singular, irregular_plural in self.irregular_words.iteritems(): - match = re.search(u'(?i)(^' + irregular_singular + u')$', word, re.IGNORECASE) - if match: - result = re.sub(u'(?i)' + irregular_singular + u'$', match.expand(u'\\1')[0] + irregular_plural[1:], word) - return utils.deunicodify(result, origType) - - for rule in rules: - match = re.search(rule[0], word, re.IGNORECASE) - if match: - groups = match.groups() - replacement = rule[1] - if re.match(u'\|', replacement): - for k in range(1, len(groups)): - replacement = replacement.replace(u'|' + k, self.string_replace(groups[k - 1], u'ÁÉÍÓÚáéíóú', u'AEIOUaeiou')) - - result = re.sub(rule[0], replacement, word) - # Esto acentúa los sustantivos que al pluralizarse se - # convierten en esdrújulos como esmóquines, jóvenes... - match = re.search(u'(?i)([aeiou]).{1,3}([aeiou])nes$', result) - - if match and len(match.groups()) > 1 and not re.search(u'(?i)[áéíóú]', word): - result = result.replace(match.group(0), self.string_replace( - match.group(1), u'AEIOUaeiou', u'ÁÉÍÓÚáéíóú') + match.group(0)[1:]) - - return utils.deunicodify(result, origType) - - return utils.deunicodify(word, origType) - - - def singularize(self, word): - ''' - Singularizes Spanish nouns. - Input string can be Unicode (e.g. u"palabras"), or a str encoded in UTF-8 or Latin-1. - Output string will be encoded the same way as the input. - ''' - - word, origType = utils.unicodify(word) # all internal calculations are done in Unicode - - rules = [ - [r'(?i)^([bcdfghjklmnñpqrstvwxyz]*)([aeiou])([ns])es$', u'\\1\\2\\3'], - [r'(?i)([aeiou])([ns])es$', u'~1\\2'], - [r'(?i)shes$', u'sh'], # flashes->flash - [r'(?i)oides$', u'oide'], # androides->androide - [r'(?i)(sis|tis|xis)$', u'\\1'], # crisis, apendicitis, praxis - [r'(?i)(é)s$', u'\\1'], # bebés->bebé - [r'(?i)(ces)$', u'z'], # luces->luz - [r'(?i)([^e])s$', u'\\1'], # casas->casa - [r'(?i)([bcdfghjklmnñprstvwxyz]{2,}e)s$', u'\\1'], # cofres->cofre - [r'(?i)([ghñptv]e)s$', u'\\1'], # llaves->llave, radiocasetes->radiocasete - [r'(?i)jes$', u'je'], # ejes->eje - [r'(?i)ques$', u'que'], # tanques->tanque - [r'(?i)es$', u''] # ELSE remove _es_ monitores->monitor - ] - - lower_cased_word = word.lower() - - for uncountable_word in self.non_changing_words: - if lower_cased_word[-1 * len(uncountable_word):] == uncountable_word: - return utils.deunicodify(word, origType) - - for irregular_singular, irregular_plural in self.irregular_words.iteritems(): - match = re.search(u'(^' + irregular_plural + u')$', word, re.IGNORECASE) - if match: - result = re.sub(u'(?i)' + irregular_plural + u'$', match.expand(u'\\1')[0] + irregular_singular[1:], word) - return utils.deunicodify(result, origType) - - for rule in rules: - match = re.search(rule[0], word, re.IGNORECASE) - if match: - groups = match.groups() - replacement = rule[1] - if re.match(u'~', replacement): - for k in range(1, len(groups)): - replacement = replacement.replace(u'~' + k, self.string_replace(groups[k - 1], u'AEIOUaeiou', u'ÁÉÍÓÚáéíóú')) - - result = re.sub(rule[0], replacement, word) - # Esta es una posible solución para el problema de dobles - # acentos. Un poco guarrillo pero funciona - match = re.search(u'(?i)([áéíóú]).*([áéíóú])', result) - - if match and len(match.groups()) > 1 and not re.search(u'(?i)[áéíóú]', word): - result = self.string_replace( - result, u'ÁÉÍÓÚáéíóú', u'AEIOUaeiou') - - return utils.deunicodify(result, origType) - - return utils.deunicodify(word, origType) - - -# Copyright (c) 2006 Bermi Ferrer Martinez -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software to deal in this software without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this software, and to permit -# persons to whom this software is furnished to do so, subject to the following -# condition: -# -# THIS SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THIS SOFTWARE OR THE USE OR OTHER DEALINGS IN -# THIS SOFTWARE. - diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..44c3b53 --- /dev/null +++ b/setup.py @@ -0,0 +1,13 @@ +# coding = utf-8 + +from setuptools import setup, find_packages + + +setup(name='inflector', + version='0.1a', + description='Inflects English, French and Spanish nouns. Similar to Rails inflector but for Python', + url='https://github.com/bermi/Python-Inflector', + author='Bermi Ferrer', + license='BSD', + packages=['inflector'], + zip_safe=False) \ No newline at end of file diff --git a/tests_es.py b/tests_es.py index 6f75030..55cf732 100755 --- a/tests_es.py +++ b/tests_es.py @@ -7,7 +7,7 @@ # import unittest from inflector import Inflector -from rules.spanish import Spanish +from inflector import Spanish class SpanishInflectorTestCase(unittest.TestCase): diff --git a/utils.py b/utils.py deleted file mode 100644 index e4f572e..0000000 --- a/utils.py +++ /dev/null @@ -1,44 +0,0 @@ -__author__ = 'omrio' - -import unicodedata - - -def unicodify(st): - ''' - Convert the given string to normalized Unicode (i.e. combining characters such as accents are combined) - If given arg is not a string, it's returned as is, and origType is 'noConversion'. - @return a tuple with the unicodified string and the original string encoding. - ''' - - # Convert 'st' to Unicode - if isinstance(st, unicode): - origType = 'unicode' - elif isinstance(st, str): - try: - st = st.decode('utf8') - origType = 'utf8' - except UnicodeDecodeError: - try: - st = st.decode('latin1') - origType = 'latin1' - except: - raise UnicodeEncodeError('Given string %s must be either Unicode, UTF-8 or Latin-1' % repr(st)) - else: - origType = 'noConversion' - - # Normalize the Unicode (to combine any combining characters, e.g. accents, into the previous letter) - if origType != 'noConversion': - st = unicodedata.normalize('NFKC', st) - - return st, origType - - -def deunicodify(unicodifiedStr, origType): - ''' - Convert the given unicodified string back to its original type and encoding - ''' - - if origType == 'unicode': - return unicodifiedStr - - return unicodifiedStr.encode(origType) From 2cc562ce651fc4622684a242f19803a0a0aa8589 Mon Sep 17 00:00:00 2001 From: Fize Jacques Date: Mon, 2 Jul 2018 14:52:16 +0200 Subject: [PATCH 3/4] Micro Correction --- README.md | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index d24a937..04f96cf 100644 --- a/README.md +++ b/README.md @@ -3,7 +3,7 @@ The Inflector is used for getting the plural and singular form of nouns. This piece of code helps on creating code that favors convention over configuration. Only English, French and Spanish nouns are supported. The English version is a port of Ruby on Rails Inflector, while the Spanish Version has been developed from scratch with the help of Carles Sadurní. -The French version was implement by [sblondon](https://github.com/sblondon/pluralizefr) +The French version was implemented by [sblondon](https://github.com/sblondon/pluralizefr). Apart from converting singulars and plurals, this module also handles necessary string conversion for convention based applications like: *tableize*, *urlize*, and so forth. @@ -35,7 +35,7 @@ Then, to pluralize, run the following code 'matrices' >>> Inflector(French()).pluralize("cheval") 'chevaux' ->>>Inflector(Spanish()).pluralize("arbol") +>>> Inflector(Spanish()).pluralize("arbol") 'arboles' ``` @@ -58,7 +58,7 @@ Pluralizes nouns. * **singularize(word)** Singularizes nouns. - * **conditionalPlural(numer_of_records, word) + * **conditionalPlural(numer_of_records, word)** Returns the plural form of a word if first parameter is greater than 1 * **titleize(word, uppercase = '')** @@ -118,8 +118,3 @@ version ready to be inserted in friendly URLs * **foreignKey(class_name, separate_class_name_and_id_with_underscore = 1)** Returns class_name in underscored form, with "_id" tacked on at the end. This is for use in dealing with the database. - - -## Acknowledgement - - From 590af0d2f1b5862ebb5f0c605df492a959d3d054 Mon Sep 17 00:00:00 2001 From: Fize Jacques Date: Mon, 13 May 2019 15:32:49 +0200 Subject: [PATCH 4/4] DEBUG --- .gitignore | 8 +++++++- inflector/rules/french.py | 8 ++++---- setup.py | 2 +- 3 files changed, 12 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 7e99e36..f987842 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,7 @@ -*.pyc \ No newline at end of file +*.pyc +build/ +dist/ +inflector.egg-info.DS_Store +.idea/ +.DS_Store +inflector.egg-info/ diff --git a/inflector/rules/french.py b/inflector/rules/french.py index 7471e1b..8f449e1 100644 --- a/inflector/rules/french.py +++ b/inflector/rules/french.py @@ -12,7 +12,7 @@ class French (Base): """ - Inflector for pluralize and singularize English nouns. + Inflector for pluralize and singularize French nouns. This is the default Inflector for the Inflector obj """ @@ -88,18 +88,18 @@ def _default(self,word): return word + "s" def singularize(self, word): - '''Singularizes English nouns.''' + '''Singularizes French nouns.''' word=word.lower() - if word in set(["baux", "coraux", "émaux", "fermaux", "soupiraux", "travaux", "vantaux", "ventaux", "vitraux"]): return word[:-3] + "ail" - if word.endswith("als") or word.endswith("aux"): + if (word.endswith("als") or word.endswith("aux")) and not word.endswith("eaux"): return word[:-3]+"al" if word.endswith == "vieux": return "vieil" if word.endswith("x") or word.endswith("s"): return word[:-1] + return word diff --git a/setup.py b/setup.py index 44c3b53..92d14b6 100644 --- a/setup.py +++ b/setup.py @@ -9,5 +9,5 @@ url='https://github.com/bermi/Python-Inflector', author='Bermi Ferrer', license='BSD', - packages=['inflector'], + packages=['inflector','inflector.rules'], zip_safe=False) \ No newline at end of file