[pgAdmin4][Patch] - RM 3780 pgAdmin4 lacks ability to specify NULL values in CSV export

public inbox for [email protected]  
help / color / mirror / Atom feed

From: Akshay Joshi <[email protected]>
To: pgadmin-hackers <[email protected]>
Subject: [pgAdmin4][Patch] - RM 3780 pgAdmin4 lacks ability to specify NULL values in CSV export
Date: Tue, 18 Dec 2018 09:15:30 +0530
Message-ID: <CANxoLDdQr19YWNRgYU+ARs4BgnJGwNBu_51+g0AXWbCC6ZpNNA@mail.gmail.com> (raw)

Hi Hackers,

Attached is the patch to fix RM #3780 pgAdmin4 lacks ability to specify
NULL values in CSV export.

Please review it.


-- 
*Akshay Joshi*

*Sr. Software Architect *



*Phone: +91 20-3058-9517Mobile: +91 976-788-8246*


Attachments:

  [application/octet-stream] RM_3780.patch (13.9K, 3-RM_3780.patch)
  download | inline diff:
diff --git a/web/pgadmin/utils/csv.py b/web/pgadmin/utils/csv.py
new file mode 100644
index 0000000..2b46478
--- /dev/null
+++ b/web/pgadmin/utils/csv.py
@@ -0,0 +1,414 @@
+# -*- coding: utf-8 -*-
+"""A port of Python 3's csv module to Python 2.
+
+The API of the csv module in Python 2 is drastically different from
+the csv module in Python 3. This is due, for the most part, to the
+difference between str in Python 2 and Python 3.
+
+The semantics of Python 3's version are more useful because they support
+unicode natively, while Python 2's csv does not.
+"""
+from __future__ import unicode_literals, absolute_import
+
+__all__ = ["QUOTE_MINIMAL", "QUOTE_ALL", "QUOTE_NONNUMERIC", "QUOTE_NONE",
+           "Error", "Dialect", "excel", "excel_tab", "writer",
+           "register_dialect", "get_dialect", "DictWriter"]
+
+import re
+import numbers
+from csv import (
+    QUOTE_MINIMAL, QUOTE_ALL, QUOTE_NONNUMERIC, QUOTE_NONE, Error,
+)
+
+# Stuff needed from six
+import sys
+PY3 = sys.version_info[0] == 3
+if PY3:
+    string_types = str
+    text_type = str
+    binary_type = bytes
+    unichr = chr
+else:
+    string_types = basestring
+    text_type = unicode
+    binary_type = str
+
+
+class QuoteStrategy(object):
+    quoting = None
+
+    def __init__(self, dialect):
+        if self.quoting is not None:
+            assert dialect.quoting == self.quoting
+        self.dialect = dialect
+        self.setup()
+
+        escape_pattern_quoted = r'({quotechar})'.format(
+            quotechar=re.escape(self.dialect.quotechar or '"'))
+        escape_pattern_unquoted = r'([{specialchars}])'.format(
+            specialchars=re.escape(self.specialchars))
+
+        self.escape_re_quoted = re.compile(escape_pattern_quoted)
+        self.escape_re_unquoted = re.compile(escape_pattern_unquoted)
+
+    def setup(self):
+        """Optional method for strategy-wide optimizations."""
+
+    def quoted(self, field=None, raw_field=None, only=None):
+        """Determine whether this field should be quoted."""
+        raise NotImplementedError(
+            'quoted must be implemented by a subclass')
+
+    @property
+    def specialchars(self):
+        """The special characters that need to be escaped."""
+        raise NotImplementedError(
+            'specialchars must be implemented by a subclass')
+
+    def escape_re(self, quoted=None):
+        if quoted:
+            return self.escape_re_quoted
+        return self.escape_re_unquoted
+
+    def escapechar(self, quoted=None):
+        if quoted and self.dialect.doublequote:
+            return self.dialect.quotechar
+        return self.dialect.escapechar
+
+    def prepare(self, raw_field, only=None):
+        field = text_type(raw_field if raw_field is not None else '')
+        quoted = self.quoted(field=field, raw_field=raw_field, only=only)
+
+        escape_re = self.escape_re(quoted=quoted)
+        escapechar = self.escapechar(quoted=quoted)
+
+        if escape_re.search(field):
+            escapechar = '\\\\' if escapechar == '\\' else escapechar
+            if not escapechar:
+                raise Error('No escapechar is set')
+            escape_replace = r'{escapechar}\1'.format(escapechar=escapechar)
+            field = escape_re.sub(escape_replace, field)
+
+        if quoted:
+            field = '{quotechar}{field}{quotechar}'.format(
+                quotechar=self.dialect.quotechar, field=field)
+
+        return field
+
+
+class QuoteMinimalStrategy(QuoteStrategy):
+    quoting = QUOTE_MINIMAL
+
+    def setup(self):
+        self.quoted_re = re.compile(r'[{specialchars}]'.format(
+            specialchars=re.escape(self.specialchars)))
+
+    @property
+    def specialchars(self):
+        return (
+            self.dialect.lineterminator +
+            self.dialect.quotechar +
+            self.dialect.delimiter +
+            (self.dialect.escapechar or '')
+        )
+
+    def quoted(self, field, only, **kwargs):
+        if field == self.dialect.quotechar and not self.dialect.doublequote:
+            # If the only character in the field is the quotechar, and
+            # doublequote is false, then just escape without outer quotes.
+            return False
+        return field == '' and only or bool(self.quoted_re.search(field))
+
+
+class QuoteAllStrategy(QuoteStrategy):
+    quoting = QUOTE_ALL
+
+    @property
+    def specialchars(self):
+        return self.dialect.quotechar
+
+    def quoted(self, **kwargs):
+        return True
+
+
+class QuoteNonnumericStrategy(QuoteStrategy):
+    quoting = QUOTE_NONNUMERIC
+
+    @property
+    def specialchars(self):
+        return (
+            self.dialect.lineterminator +
+            self.dialect.quotechar +
+            self.dialect.delimiter +
+            (self.dialect.escapechar or '')
+        )
+
+    def quoted(self, raw_field, **kwargs):
+        if raw_field is None:
+            return False
+        return not isinstance(raw_field, numbers.Number)
+
+
+class QuoteNoneStrategy(QuoteStrategy):
+    quoting = QUOTE_NONE
+
+    @property
+    def specialchars(self):
+        return (
+            self.dialect.lineterminator +
+            (self.dialect.quotechar or '') +
+            self.dialect.delimiter +
+            (self.dialect.escapechar or '')
+        )
+
+    def quoted(self, field, only, **kwargs):
+        if field == '' and only:
+            raise Error('single empty field record must be quoted')
+        return False
+
+
+class writer(object):
+    def __init__(self, fileobj, dialect='excel', **fmtparams):
+        if fileobj is None:
+            raise TypeError('fileobj must be file-like, not None')
+
+        self.fileobj = fileobj
+
+        if isinstance(dialect, text_type):
+            dialect = get_dialect(dialect)
+
+        try:
+            self.dialect = Dialect.combine(dialect, fmtparams)
+        except Error as e:
+            raise TypeError(*e.args)
+
+        strategies = {
+            QUOTE_MINIMAL: QuoteMinimalStrategy,
+            QUOTE_ALL: QuoteAllStrategy,
+            QUOTE_NONNUMERIC: QuoteNonnumericStrategy,
+            QUOTE_NONE: QuoteNoneStrategy,
+        }
+        self.strategy = strategies[self.dialect.quoting](self.dialect)
+
+    def writerow(self, row):
+        if row is None:
+            raise Error('row must be an iterable')
+
+        row = list(row)
+        only = len(row) == 1
+        row = [self.strategy.prepare(field, only=only) for field in row]
+
+        line = self.dialect.delimiter.join(row) + self.dialect.lineterminator
+        return self.fileobj.write(line)
+
+    def writerows(self, rows):
+        for row in rows:
+            self.writerow(row)
+
+
+_dialect_registry = {}
+
+
+def register_dialect(name, dialect='excel', **fmtparams):
+    if not isinstance(name, text_type):
+        raise TypeError('"name" must be a string')
+
+    dialect = Dialect.extend(dialect, fmtparams)
+
+    try:
+        Dialect.validate(dialect)
+    except Exception:
+        raise TypeError('dialect is invalid')
+
+    assert name not in _dialect_registry
+    _dialect_registry[name] = dialect
+
+
+def get_dialect(name):
+    try:
+        return _dialect_registry[name]
+    except KeyError:
+        raise Error('Could not find dialect {0}'.format(name))
+
+
+class Dialect(object):
+    """Describe a CSV dialect.
+    This must be subclassed (see csv.excel).  Valid attributes are:
+    delimiter, quotechar, escapechar, doublequote, skipinitialspace,
+    lineterminator, quoting, strict.
+    """
+    _name = ""
+    _valid = False
+    # placeholders
+    delimiter = None
+    quotechar = None
+    escapechar = None
+    doublequote = None
+    skipinitialspace = None
+    lineterminator = None
+    quoting = None
+    strict = None
+
+    def __init__(self):
+        self.validate(self)
+        if self.__class__ != Dialect:
+            self._valid = True
+
+    @classmethod
+    def validate(cls, dialect):
+        dialect = cls.extend(dialect)
+
+        if not isinstance(dialect.quoting, int):
+            raise Error('"quoting" must be an integer')
+
+        if dialect.delimiter is None:
+            raise Error('delimiter must be set')
+        cls.validate_text(dialect, 'delimiter')
+
+        if dialect.lineterminator is None:
+            raise Error('lineterminator must be set')
+        if not isinstance(dialect.lineterminator, text_type):
+            raise Error('"lineterminator" must be a string')
+
+        if dialect.quoting not in [
+                QUOTE_NONE, QUOTE_MINIMAL, QUOTE_NONNUMERIC, QUOTE_ALL]:
+            raise Error('Invalid quoting specified')
+
+        if dialect.quoting != QUOTE_NONE:
+            if dialect.quotechar is None and dialect.escapechar is None:
+                raise Error('quotechar must be set if quoting enabled')
+            if dialect.quotechar is not None:
+                cls.validate_text(dialect, 'quotechar')
+
+    @staticmethod
+    def validate_text(dialect, attr):
+        val = getattr(dialect, attr)
+        if not isinstance(val, text_type):
+            if type(val) == bytes:
+                raise Error('"{0}" must be string, not bytes'.format(attr))
+            raise Error('"{0}" must be string, not {1}'.format(
+                attr, type(val).__name__))
+
+        if len(val) != 1:
+            raise Error('"{0}" must be a 1-character string'.format(attr))
+
+    @staticmethod
+    def defaults():
+        return {
+            'delimiter': ',',
+            'doublequote': True,
+            'escapechar': None,
+            'lineterminator': '\r\n',
+            'quotechar': '"',
+            'quoting': QUOTE_MINIMAL,
+            'skipinitialspace': False,
+            'strict': False,
+        }
+
+    @classmethod
+    def extend(cls, dialect, fmtparams=None):
+        if isinstance(dialect, string_types):
+            dialect = get_dialect(dialect)
+
+        if fmtparams is None:
+            return dialect
+
+        defaults = cls.defaults()
+
+        if any(param not in defaults for param in fmtparams):
+            raise TypeError('Invalid fmtparam')
+
+        specified = dict(
+            (attr, getattr(dialect, attr, None))
+            for attr in cls.defaults()
+        )
+
+        specified.update(fmtparams)
+        return type(str('ExtendedDialect'), (cls,), specified)
+
+    @classmethod
+    def combine(cls, dialect, fmtparams):
+        """Create a new dialect with defaults and added parameters."""
+        dialect = cls.extend(dialect, fmtparams)
+        defaults = cls.defaults()
+        specified = dict(
+            (attr, getattr(dialect, attr, None))
+            for attr in defaults
+            if getattr(dialect, attr, None) is not None or
+            attr in ['quotechar', 'delimiter', 'lineterminator', 'quoting']
+        )
+
+        defaults.update(specified)
+        dialect = type(str('CombinedDialect'), (cls,), defaults)
+        cls.validate(dialect)
+        return dialect()
+
+    def __delattr__(self, attr):
+        if self._valid:
+            raise AttributeError('dialect is immutable.')
+        super(Dialect, self).__delattr__(attr)
+
+    def __setattr__(self, attr, value):
+        if self._valid:
+            raise AttributeError('dialect is immutable.')
+        super(Dialect, self).__setattr__(attr, value)
+
+
+class excel(Dialect):
+    """Describe the usual properties of Excel-generated CSV files."""
+    delimiter = ','
+    quotechar = '"'
+    doublequote = True
+    skipinitialspace = False
+    lineterminator = '\r\n'
+    quoting = QUOTE_MINIMAL
+
+
+class excel_tab(excel):
+    """Describe the usual properties of Excel-generated TAB-delimited files."""
+    delimiter = '\t'
+
+
+class unix_dialect(Dialect):
+    """Describe the usual properties of Unix-generated CSV files."""
+    delimiter = ','
+    quotechar = '"'
+    doublequote = True
+    skipinitialspace = False
+    lineterminator = '\n'
+    quoting = QUOTE_ALL
+
+
+register_dialect("excel", excel)
+register_dialect("excel-tab", excel_tab)
+register_dialect("unix", unix_dialect)
+
+
+class DictWriter(object):
+    def __init__(self, f, fieldnames, restval="", extrasaction="raise",
+                 dialect="excel", *args, **kwds):
+        self.fieldnames = fieldnames    # list of keys for the dict
+        self.restval = restval          # for writing short dicts
+        if extrasaction.lower() not in ("raise", "ignore"):
+            raise ValueError("extrasaction (%s) must be 'raise' or 'ignore'"
+                             % extrasaction)
+        self.extrasaction = extrasaction
+        self.writer = writer(f, dialect, *args, **kwds)
+
+    def writeheader(self):
+        header = dict(zip(self.fieldnames, self.fieldnames))
+        self.writerow(header)
+
+    def _dict_to_list(self, rowdict):
+        if self.extrasaction == "raise":
+            wrong_fields = [k for k in rowdict if k not in self.fieldnames]
+            if wrong_fields:
+                raise ValueError("dict contains fields not in fieldnames: " +
+                                 ", ".join([repr(x) for x in wrong_fields]))
+        return (rowdict.get(key, self.restval) for key in self.fieldnames)
+
+    def writerow(self, rowdict):
+        return self.writer.writerow(self._dict_to_list(rowdict))
+
+    def writerows(self, rowdicts):
+        return self.writer.writerows(map(self._dict_to_list, rowdicts))
diff --git a/web/pgadmin/utils/driver/psycopg2/connection.py b/web/pgadmin/utils/driver/psycopg2/connection.py
index 2c9fcb5..d31df4a 100644
--- a/web/pgadmin/utils/driver/psycopg2/connection.py
+++ b/web/pgadmin/utils/driver/psycopg2/connection.py
@@ -37,16 +37,13 @@ from .typecast import register_global_typecasters, \
     register_string_typecasters, register_binary_typecasters, \
     register_array_to_string_typecasters, ALL_JSON_TYPES
 from .encoding import getEncoding
+from pgadmin.utils import csv
 
 if sys.version_info < (3,):
-    # Python2 in-built csv module do not handle unicode
-    # backports.csv module ported from PY3 csv module for unicode handling
-    from backports import csv
     from StringIO import StringIO
     IS_PY2 = True
 else:
     from io import StringIO
-    import csv
     IS_PY2 = False
 
 _ = gettext

view thread (20+ messages)  latest in thread

reply

Reply instructions:

You may reply publicly to this message via plain-text email
using any one of the following methods:

* Reply to all the recipients using the --to and --cc options:
  reply via email

  To: [email protected]
  Cc: [email protected]
  Subject: Re: [pgAdmin4][Patch] - RM 3780 pgAdmin4 lacks ability to specify NULL values in CSV export
  In-Reply-To: <CANxoLDdQr19YWNRgYU+ARs4BgnJGwNBu_51+g0AXWbCC6ZpNNA@mail.gmail.com>

* Save the following mbox file, import it into your mail client,
  and reply-to-all from there: mbox

This inbox is served by agora; see mirroring instructions
for how to clone and mirror all data and code used for this inbox