#!/usr/bin/env python
# -*- encoding: utf-8 -*-

# Class for handling PO files

import time

def unquote(text):
    import re
    cre = re
    #text = cre.sub(r'^[^"]+"([^\0]*)"[^"]*\n$', r'\1', text)
    #text = cre.sub(r'"[ \t]*\\?\n(\#~)?[ \t]*"', '', text)
    #text = cre.sub(r'\\[ \t]*\n', '', text)

    result = ''
    while 1:
        match = cre.search(r'\\([abfnt\"\\]|[0-7]+)', text)
        if not match:
            break
        result = result + text[:match.start()]

        group = match.group(1)
        if group == 'a':
            result = result + '\a'
        elif group == 'b':
            result = result + '\b'
        elif group == 'f':
            result = result + '\f'
        elif group == 'n':
            result = result + '\n'
        elif group == 't':
            result = result + '\t'
        elif group == '"':
            result = result + '"'
        elif group == '\\':
            result = result + '\\'
        else:
            result = result + chr(string.atoi(group, 8))

        text = text[match.end():]

    return result + text


def requote(text):
    import re
    cre = re

    text = cre.sub('\\\\', r'\\\\', text)
    text = cre.sub('"', r'\\"', text)
    text = cre.sub('\a', r'\\a', text)
    text = cre.sub('\b', r'\\b', text)
    text = cre.sub('\f', r'\\f', text)
    text = cre.sub('\t', r'\\t', text)

    text = cre.sub(r'\n', r'\\n', text)
    text = cre.sub(r'\t', r'\\t', text)
    return text


def po_unescape(string):
    table = { u"\\\\" : u"\\",
              u"\\\"" : u"\"",
              u"\\n"  : u"\n",
              u"\\r"  : u"\r",
              u"\\v"  : u"\v",
              u"\\a"  : u"\a",
              u"\\f"  : u"\f",
              u"\\b"  : u"\b",
              u"\\t"  : u"\t" }
    for a in table.keys():
        string = string.replace(a, table[a])
    return string

def po_escape(string):
    table = { u"\\\\" : u"\\",
              u"\\\"" : u"\"",
              u"\\n"  : u"\n",
              u"\\r"  : u"\r",
              u"\\v"  : u"\v",
              u"\\a"  : u"\a",
              u"\\f"  : u"\f",
              u"\\b"  : u"\b",
              u"\\t"  : u"\t" }
    for a in table.keys():
        string = string.replace(table[a], a)
    return string

escape = requote
unescape = unquote

class PoMessage:
    def __init__(self, message,
                 translation = None,
                 comment = None,
                 flags = None,
                 sources = None,
                 translator_comment = None):
        self._message = message
        self._flags = []
        self._sources = []
        self._translation = translation
        self._comment = comment.strip()
        self._special_comment = {}
        self._translator_comment = translator_comment.strip()
        self.plural = 0
        self.obsolete = 0
        
        if flags:
            # FIXME: Add checking of supported flags
            for flag in flags: self.set_flag(flag)
        
        if sources:
            for source in sources: self._sources.append(source.strip())

    def is_flagged(self, flag):
        flag = flag.strip()
        return flag in self._flags

    def set_flag(self, flag):
        flag = flag.strip()
        
        if flag[:3] == "no-":
            negflag = flag[3:]
        else:
            negflag = "no-" + flag

        if negflag in self._flags:
            self._flags.remove(negflag)

        if not flag in self._flags:
            self._flags.append(flag)

    def unset_flag(self, flag):
        flag = flag.strip()
        if flag in self._flags:
            self._flags.remove(flag)

    def get_comment(self):
        return self._comment
        
    def set_comment(self, comment):
        self._comment = comment
        
    def unset_comment(self):
        self._comment = None
        

    def get_special_comment(self, key):
        return self._special_comment[key]
        
    def set_special_comment(self, key, comment):
        self._special_comment[key] = comment
        
    def unset_special_comment(self, key):
        self._special_comment[key] = None
        

    def get_message(self):
        return self._message
        
    def set_message(self, message):
        self._message = message
        
    def unset_message(self):
        self._message = None
        

    def get_translation(self):
        return self._translation
        
    def set_translation(self, translation):
        self._translation = translation
        
    def unset_translation(self):
        self._translation = None
        

    def get_translator_comment(self):
        return self._translator_comment
        
    def set_translator_comment(self, translator_comment):
        self._translator_comment = translator_comment
        
    def unset_translator_comment(self):
        self._translator_comment = None

    def __repr__(self):
        s = u""
        
        if self._comment:
            com = self._comment.replace("\n", "\n# ")
            s += "# " + com + "\n"

        for char in self._special_comment:
            com = self._special_comment[char].replace("\n", "\n#" + char)
            s += "#" + char + com + "\n"

        if self._translator_comment:
            com = self._translator_comment.replace("\n", "\n#.")
            s += "#. " + com + "\n"

        if len(self._sources):
            s += "#: " + " ".join(self._sources) + "\n"

        if len(self._flags):
            s += "#, " + ", ".join(self._flags) + "\n"
        
        if self.obsolete:
            s += u"#~ msgid \"%s\"\n" % escape(self._message)
            s += u"#~ msgstr \"%s\"\n" % escape(self._translation)
        else:
            s += u"msgid \"%s\"\n" % escape(self._message)
            s += u"msgstr \"%s\"\n" % escape(self._translation)
        return s

class PoPluralMessage(PoMessage):
    def __init__(self, message, plural_message,
                 forms = 2,
                 translations = None,
                 comment = None,
                 flags = None,
                 sources = None,
                 translator_comment = None):
        PoMessage.__init__(self, message, None,
                           comment, flags, sources,
                           translator_comment)
        self._translations = []
        for t in translations:
            self._translations.append(t)
        self._plural_message = plural_message
        self._forms = forms
        self.plural = 1
        self.obsolete = 0
        
    def get_plural_message(self):
        return self._plural_message
        
    def set_plural_message(self, message):
        self._plural_message = message
        
    def unset_plural_message(self):
        self._plural_message = None
        

    def get_translation(self, id):
        return self._translations[id]
        
    def set_translation(self, id, translation):
        length = len(self._translations)
        while length <= id:
            self._translations.append("")
            length += 1
        self._translations[id] = translation
        
    def unset_translation(self, id):
        self._translations[id] = None
        
    def __repr__(self):
        s = u""

        if self._comment:
            com = self._comment.replace("\n", "\n# ")
            s += "# " + com + "\n"

        if self._translator_comment:
            com = self._translator_comment.replace("\n", "\n#.")
            s += "#. " + com + "\n"

        if len(self._sources):
            s += "#: " + " ".join(self._sources) + "\n"

        if len(self._flags):
            s += "#, " + ", ".join(self._flags) + "\n"
        
        if self.obsolete:
            s += u"#~ msgid \"%s\"\n" % escape(self._message)
            s += u"#~ msgid_plural \"%s\"\n" % escape(self._plural_message)
        else:
            s += u"msgid \"%s\"\n" % escape(self._message)
            s += u"msgid_plural \"%s\"\n" % escape(self._plural_message)
        for i in range(self._forms):
            if self.obsolete:
                s += u"#~ "

            if not i < len(self._translations):
                s += u"msgstr[%d] \"\"\n" % (i)
            else:
                s += u"msgstr[%d] \"%s\"\n" % (i, escape(self._translations[i]))
            
        return s

class PoFile:
    def __init__(self, file=None, contents=None):
        """Reads existing PO file, or starts a new one."""
        if file:
            f = open(file, "r")
            contents = f.read()

        self._encoding = "UTF-8"

        self._messages = { }
        self._message_indexes = []
        self._header = { }
        self._copyright = ""
        self._flags = []

        if contents:
            self._parse_contents(contents)
        else:
            # Construct a new PO object
            self._header = {
                "Project-Id-Version" : "none",
                "Report-Msgid-Bugs-To" : "",
                "POT-Creation-Date" : time.strftime("%Y-%m-%d %H:%M%z"),
                "Last-Translator" : "",
                "Plural-Forms" : "nplurals=2; plural=n==1;"
                }

    def __iter__(self):
        return self._message_indexes.__iter__()

    def next(self):
        return self._messages[self._message_indexes.next()]

    def __repr__(self):
        s = u""

        if len(self._copyright) and self._copyright[0] == "\n": self._copyright = self._copyright[1:]
        for copyline in self._copyright.split("\n"):
            s += u"# %s\n" % (copyline)
        if len(self._flags):
            s += "#, " + ", ".join(self._flags) + "\n"
        s += u"msgid \"\"\nmsgstr \"\"\n"
        
        header_order = [ "Project-Id-Version", "PO-Revision-Date", "POT-Creation-Date",
                         "Last-Translator", "Language-Team", "MIME-Version", "Content-Type",
                         "Content-Transfer-Encoding", "Plural-Forms" ]
        keys = self._header.keys()
        for field in header_order:
            if field in keys:
                keys.remove(field)
                s += u"\"%s: %s\\n\"\n" % ( unicode(field).strip(), unicode(self._header[field]).strip() )
        for field in keys:
            s += u"\"%s: %s\\n\"\n" % ( unicode(field).strip(), unicode(self._header[field]).strip() )
        s += "\n"
        
        for msg in self:
            s += unicode(self._messages[msg]) + "\n"
        return s

    def __getitem__(self, key):
        return self._messages[key]

    def add_message(self, key, msg):
        self._message_indexes.append(key)
        self._messages[key] = msg
    
    def _parse_contents(self, contents):
        self._messages = {}

        if contents[-1] != "\n": contents += "\n"

        # state machine for parsing PO files
        msgid = ""; msgstr = ""; comment = ""; trcomment = ""; plural = ""; othercomments = {}
        in_msgid = in_msgstr = in_msgid_plural = in_plural = 0
        flags = []
        sources = []
        plurals = []
        obsolete = 0
        
        lines = contents.split("\n")
        lines.append("\n")
        for line in lines:
            line = line.strip()
            if line[0:3] == "#~ ":
                obsolete = 1
                line = line[3:]

            if line == "":
                enc = self._encoding
                if in_msgstr and msgid != "":
                    m = PoMessage(unescape(unicode(msgid.decode(enc))),
                                  translation = unescape(unicode(msgstr.decode(enc))),
                                  comment = comment.decode(enc),
                                  sources = sources,
                                  flags = flags,
                                  translator_comment = trcomment.decode(enc))
                    m.obsolete = obsolete
                    if othercomments:
                        for (ckey, scomment) in othercomments.iteritems():
                            m.set_special_comment(ckey, scomment.decode(enc))
                    self.add_message(msgid, m)
                elif in_msgstr and msgid == "":
                    # PO header 
                    self._copyright = comment.decode(enc)
                    self._flags = flags
                    hlines = msgstr.split("\\n")
                    for h in hlines:
                        if h.find(":") != -1:
                            t, v = h.split(":",1)
                            self._header[t.strip()] = v.decode(self._encoding).strip()
                            if t.strip().lower() == "content-type":
                                import re
                                res = re.search(r"charset=([a-zA-Z0-9_.-]+)", v)
                                if res:
                                    self._encoding = res.group(1)
                elif len(plurals):
                    enc = self._encoding
                    for i in range(len(plurals)):
                        plurals[i] = unescape(unicode(plurals[i].decode(enc)))

                    m = PoPluralMessage(unescape(unicode(msgid.decode(enc))),
                                        plural_message = unescape(unicode(plural.decode(enc))),
                                        forms = 3,
                                        translations = plurals,
                                        comment = comment.decode(enc),
                                        sources = sources,
                                        flags = flags,
                                        translator_comment = trcomment.decode(enc))
                    m.obsolete = obsolete
                    self.add_message((msgid, plural), m)
                    
                msgid = ""; msgstr = ""; comment = ""; trcomment = ""; obsolete = 0
                in_msgid = 0; in_msgstr = 0
                flags = []; sources = []; othercomments = {}
                plural = ""; plurals = []; in_msgid_plural = 0; in_plural = 0
                    
            elif line[0] == "\"" and line[-1] == "\"":
                if in_msgid:
                    if in_msgid_plural:
                        plural += line[1:-1]
                    else:
                        msgid += line[1:-1]
                elif in_msgstr:
                    if in_plural:
                        plurals[in_plural] += line[1:-1]
                    else:
                        msgstr += line[1:-1]
                else:
                    raise Exception()

            elif line[0] == "#":
                if len(line) < 2:
                    comment += "\n"
                    continue
                if line[1] == ".":
                    trcomment += "\n" + line[2:]
                elif line[1] == " ":
                    comment += "\n" + line[2:]
                elif line[1] == ":":
                    sources += line[2:].strip().split()
                elif line[1] == ",":
                    flags = line[2:].strip().split(",")
                else:
                    ckey = line[1]
                    if ckey not in othercomments.keys():
                        othercomments[ckey] = line[2:]
                    else:
                        othercomments[ckey] += "\n" + line[2:]
                
            elif line[:12] == "msgid_plural" and in_msgid:
                in_msgid_plural = 1
                plural = line[13:].strip()[1:-1]
            elif line[:5] == "msgid" and not in_msgid:
                in_msgid = 1
                msgid = line[6:].strip()[1:-1]
            elif line[:7] == "msgstr[" and in_msgid_plural:
                pos = line.find("]", 7)
                in_plural = int(line[7:pos])
                plurals.append (line[pos+1:].strip()[1:-1])
            elif line[:6] == "msgstr" and in_msgid:
                in_msgstr = 1
                in_msgid = 0
                msgstr = line[6:].strip()[1:-1]
            else:
                pass

    def gettext(self, message):
        if message in self._messages.keys():
            return self._messages[message]
        else:
            return message
            
    def ngettext(self, message, plural, number = 0):
        key = message + "\0" + plural
        if key in self._messages.keys():
            return self._messages[key]
        else:
            return key


    def _default_fuzzy_compare(str1, str2):
        import difflib
        s = difflib.SequenceMatcher(None, str1, str2)
        ratio = s.real_quick_ratio()
        if ratio > 0.6:
            diff = None
            if ratio > 0.8: # output diff only for very similar messages
                diff = ''
                for tag, a1, a2, b1, b2 in s.get_opcodes():
                    if tag == 'equal':
                        diff += str1[a1:a2]
                    elif tag == 'delete':
                        diff += '[-' + str1[a1:a2] + '-]'
                    elif tag == 'insert':
                        diff += '{+' + str2[b1:b2] + '+}'
                    elif tag == 'replace':
                        diff += '[-' + str1[a1:a2] + '-]{+' + str2[b1:b2] + '+}'
            ratio = s.ratio()
            return (ratio, diff)
        else:
            return (ratio, None)
        


    def merge_translations(template, translation, remerge = 0, fuzzyfunc = _default_fuzzy_compare):
        """Merge "translation" with "template", possibly "remerging" all fuzzy entries."""
        import types
        template._header = translation._header
        for i in template:
            lastratio = 0.0
            if type(i) == types.TupleType: str1 = "\0".join(i)
            else:                          str1 = i

            if i in translation:
                if template[i].plural:
                    template[i].set_comment(translation[i].get_comment())
                    if translation[i].plural:
                        plural = 0
                        for t in translation[i]._translations:
                            template[i].set_translation(plural, t)
                            plural += 1
                elif not translation[i].plural:
                    template[i].set_translation(translation[i].get_translation())
                continue

            for j in translation:
                if type(j) == types.TupleType: str2 = "\0".join(j)
                else:                          str2 = j

                if fuzzyfunc:
                    (ratio, diff) = fuzzyfunc(str2, str1)
                    if ratio > 0.6 and ratio > lastratio:
                        if template[i].plural:
                            template[i].set_comment(translation[j].get_comment())
                            if translation[j].plural:
                                plural = 0
                                for t in translation[j]._translations:
                                    template[i].set_translation(plural, t)
                                    plural += 1
                        elif not translation[j].plural:
                            template[i].set_translation(translation[j].get_translation())
                            template[i].set_comment(translation[j].get_comment())
                        else:
                            ratio = 0.0 # no merging, so below check should fail

                        if ratio > 0.0 and (ratio < 1.0 or translation[j].is_flagged("fuzzy")):
                            lastratio = ratio
                            template[i].set_flag("fuzzy")
                            if diff: template[i].set_special_comment("!", diff)
                


            
if __name__ == "__main__":
    import sys
    if len(sys.argv)>1:
        t = PoFile(file=sys.argv[1])
    else:
        t = PoFile()

    print unicode(t).encode('utf-8')
    #print t._header
