diff --git a/README.md b/README.md
index d1b360b..1977d04 100644
--- a/README.md
+++ b/README.md
@@ -1,30 +1,99 @@
-# python-docx2txt #
+# python-docx2txt
 
-A pure python-based utility to extract text from docx files. 
+## Introduction
 
-The code is taken and adapted from [python-docx](https://github.com/python-openxml/python-docx). It can however also extract text from header, footer and hyperlinks. __It can now also extract images.__ 
+A pure Python-based utility to extract text from docx files.
 
-## How to install? ##
-```bash
+The code is taken and adapted from [python-docx](https://github.com/python-openxml/python-docx).
+It can however also extract text from header, footer and hyperlinks.
+__It can now also extract images and properties.__
+
+It can be used as a [Python library](#python-library)
+or from the [command line](#command-line-utility).
+
+## Python Library
+
+### Library Installation
+
+```sh
 pip install docx2txt
 ```
 
-## How to run? ##
+### Library Usage
 
-a. From command line:
-```bash
-# extract text
-docx2txt file.docx
-# extract text and images
-docx2txt -i /tmp/img_dir file.docx
+#### Procedural
+
+The library is easy to use procedurally.
+
+```py
+>>> import docx2txt
+>>> # get document text
+>>> docx2txt.process('file.docx')
+'header_textmain_textfooter_text'
+>>> # or
+>>> # get document text, extract images to /tmp/img_dir
+>>> process('file.docx', img_dir='/tmp/img_dir/')
+'header_textmain_textfooter_text'
+```
+
+#### Object Oriented
+
+The DocxFile class provides more granularity.
+Its argument list and accompanying behaviors are identical to `process()`.
+Document properties are stored as a dictionary.
+No keys are guaranteed, so the get() method is recommended.
+
+```py
+>>> import docx2txt
+>>> # parse Word doc
+>>> document = docx2txt.DocxFile('file.docx', img_dir='/tmp/img_dir/')
+>>> # path to file
+>>> document.path
+'/absolute/path/to/file.docx'
+>>> # all document text
+>>> document.text
+'header_textmain_textfooter_text'
+>>> # image directory
+>>> document.img_dir
+>>> '/tmp/img_dir'
+>>> # text components
+>>> '||'.join([document.header, document.main, document.footer])
+'header_text||main_text||footer_text'
+>>> # images (filename only if not extracted)
+>>> document.images
+['/tmp/img_dir/image1.jpg', '/tmp/img_dir/image2.jpg']
+>>> # document properties
+>>> document.properties
+{'property_name': 'property value', ...}
+>>> document.properties['title']
+'title_text'
+>>> document.properties['nonexistent']
+KeyError
+>>> document.properties.get('nonexistent')
+None
 ```
-b. From python:
-```python
-import docx2txt
 
-# extract text
-text = docx2txt.process("file.docx")
+## Command Line Utility
+
+### Utility Installation
+
+With this README file as the working directory:
 
-# extract text and write images in /tmp/img_dir
-text = docx2txt.process("file.docx", "/tmp/img_dir") 
+```sh
+python setup.py install
 ```
+
+### Utility Usage
+
+```sh
+# simple text extraction
+docx2txt file.docx
+# get text, extract images to /tmp/img_dir
+docx2txt -i /tmp/img_dir file.docx
+# get all document data
+docx2txt -d file.docx
+# get all data, extract images to /tmp/img_dir
+docx2txt -d -i /tmp/img_dir file.docx
+# same as previous, more simply:
+docx2txt -di /tmp/img_dir file.docx
+```
\ No newline at end of file
diff --git a/bin/docx2txt b/bin/docx2txt
index 62157c2..992dffc 100755
--- a/bin/docx2txt
+++ b/bin/docx2txt
@@ -4,6 +4,5 @@ import docx2txt
 
 if __name__ == '__main__':
     import sys
-    args = docx2txt.process_args()
-    text = docx2txt.process(args.docx, args.img_dir)
-    sys.stdout.write(text.encode('utf-8'))
+    for line in docx2txt.get_output():
+        sys.stdout.write(line)
diff --git a/docx2txt/__init__.py b/docx2txt/__init__.py
index 9f51a73..031c20a 100644
--- a/docx2txt/__init__.py
+++ b/docx2txt/__init__.py
@@ -1,4 +1,4 @@
-from .docx2txt import process
-from .docx2txt import process_args
+from .docx2txt import get_output, process  # noqa
+from .docx_file import DocxFile            # noqa
 
-VERSION = '0.7'
+VERSION = '0.8'
diff --git a/docx2txt/dict_util.py b/docx2txt/dict_util.py
new file mode 100644
index 0000000..4616fc1
--- /dev/null
+++ b/docx2txt/dict_util.py
@@ -0,0 +1,18 @@
+"""Dictionary Utilities"""
+
+
+def merge(dicts):
+    # type: (list) -> dict
+    merged = {}  # type: dict
+    for d in dicts:
+        merged.update(d)
+
+    return merged
+
+
+def filter_key(src_dict, key_test, getter=dict.values):
+    return getter({
+        key: val
+        for key, val
+        in src_dict.items()
+        if key_test(key)})
diff --git a/docx2txt/docx2txt.py b/docx2txt/docx2txt.py
index 0dac072..596f2df 100755
--- a/docx2txt/docx2txt.py
+++ b/docx2txt/docx2txt.py
@@ -1,28 +1,31 @@
 #! /usr/bin/env python
 
 import argparse
-import re
-import xml.etree.ElementTree as ET
-import zipfile
 import os
 import sys
 
-
-nsmap = {'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
+from . import docx_file
 
 
 def process_args():
-    parser = argparse.ArgumentParser(description='A pure python-based utility '
-                                                 'to extract text and images '
-                                                 'from docx files.')
-    parser.add_argument("docx", help="path of the docx file")
-    parser.add_argument('-i', '--img_dir', help='path of directory '
-                                                'to extract images')
+    """Parse command line arguments if invoked directly
+
+    Returns:
+        object -- .img_dir: output directory, .details: get document details
+    """
+    desc = 'A pure Python-based utility to extract data from docx files.'
+    id_help = 'path of directory to extract images'
+    ad_help = 'get all document data'
+
+    parser = argparse.ArgumentParser(description=desc)
+    parser.add_argument('docx', help='path of the docx file')
+    parser.add_argument('-i', '--img_dir', help=id_help)
+    parser.add_argument('-d', '--details', help=ad_help, action='store_true')
 
     args = parser.parse_args()
 
     if not os.path.exists(args.docx):
-        print('File {} does not exist.'.format(args.docx))
+        sys.stderr.write('File {!r} does not exist.'.format(args.docx))
         sys.exit(1)
 
     if args.img_dir is not None:
@@ -30,84 +33,36 @@ def process_args():
             try:
                 os.makedirs(args.img_dir)
             except OSError:
-                print("Unable to create img_dir {}".format(args.img_dir))
+                sys.stderr.write(
+                    'Unable to create img_dir {!r}'.format(args.img_dir))
                 sys.exit(1)
     return args
 
 
-def qn(tag):
-    """
-    Stands for 'qualified name', a utility function to turn a namespace
-    prefixed tag name into a Clark-notation qualified tag name for lxml. For
-    example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
-    Source: https://github.com/python-openxml/python-docx/
-    """
-    prefix, tagroot = tag.split(':')
-    uri = nsmap[prefix]
-    return '{{{}}}{}'.format(uri, tagroot)
+def process(docx, img_dir=None):
+    document = docx_file.DocxFile(docx, img_dir)
+    return document
 
 
-def xml2text(xml):
-    """
-    A string representing the textual content of this run, with content
-    child elements like ``<w:tab/>`` translated to their Python
-    equivalent.
-    Adapted from: https://github.com/python-openxml/python-docx/
-    """
-    text = u''
-    root = ET.fromstring(xml)
-    for child in root.iter():
-        if child.tag == qn('w:t'):
-            t_text = child.text
-            text += t_text if t_text is not None else ''
-        elif child.tag == qn('w:tab'):
-            text += '\t'
-        elif child.tag in (qn('w:br'), qn('w:cr')):
-            text += '\n'
-        elif child.tag == qn("w:p"):
-            text += '\n\n'
-    return text
+def detail_text(prop_name, prop_val):
+    return '{:10s}: {!r}\n'.format(prop_name, prop_val)
 
 
-def process(docx, img_dir=None):
-    text = u''
-
-    # unzip the docx in memory
-    zipf = zipfile.ZipFile(docx)
-    filelist = zipf.namelist()
-
-    # get header text
-    # there can be 3 header files in the zip
-    header_xmls = 'word/header[0-9]*.xml'
-    for fname in filelist:
-        if re.match(header_xmls, fname):
-            text += xml2text(zipf.read(fname))
-
-    # get main text
-    doc_xml = 'word/document.xml'
-    text += xml2text(zipf.read(doc_xml))
-
-    # get footer text
-    # there can be 3 footer files in the zip
-    footer_xmls = 'word/footer[0-9]*.xml'
-    for fname in filelist:
-        if re.match(footer_xmls, fname):
-            text += xml2text(zipf.read(fname))
-
-    if img_dir is not None:
-        # extract images
-        for fname in filelist:
-            _, extension = os.path.splitext(fname)
-            if extension in [".jpg", ".jpeg", ".png", ".bmp"]:
-                dst_fname = os.path.join(img_dir, os.path.basename(fname))
-                with open(dst_fname, "wb") as dst_f:
-                    dst_f.write(zipf.read(fname))
-
-    zipf.close()
-    return text.strip()
+def get_output():
+    args = process_args()
+    document = process(args.docx, args.img_dir)
+
+    if args.details:
+        yield detail_text('path', document.path)
+        yield detail_text('header', document.header)
+        yield detail_text('main', document.main)
+        yield detail_text('footer', document.footer)
+        yield detail_text('images', document.images)
+        yield detail_text('properties', document.properties)
+    else:
+        yield document.text
 
 
 if __name__ == '__main__':
-    args = process_args()
-    text = process(args.docx, args.img_dir)
-    sys.stdout.write(text.encode('utf-8'))
+    for line in get_output():
+        sys.stdout.write(line)
diff --git a/docx2txt/docx_file.py b/docx2txt/docx_file.py
new file mode 100644
index 0000000..0664a1e
--- /dev/null
+++ b/docx2txt/docx_file.py
@@ -0,0 +1,365 @@
+import errno
+import os.path as os_path
+import sys
+from os import makedirs
+from zipfile import ZipFile
+
+from . import dict_util, xml_util
+
+
+def simplify_rel(attribs):
+    # type: (dict) -> str
+    """Simplify Type of a Relationship node
+
+    Arguments:
+        attribs {dict} -- attributes of Relationship node
+
+    Returns:
+        str - salient portion of rel type (officeDocument, image, etc.)
+    """
+    attr = attribs.get('Type', '')
+
+    return os_path.basename(attr)
+
+
+def locate_rel(attribs):
+    # type: (dict) -> str
+    """Get path to file in Relationship node
+
+    Arguments:
+        attribs {dict} -- attributes of Relationship node
+
+    Returns:
+        str -- path of Target within package
+    """
+    attr = attribs.get('Target', '')
+
+    return attr.lstrip('/')
+
+
+def get_package_rels(pkg_xml):
+    # type: (bytes) -> dict
+    """Get package relationships
+
+    Arguments:
+        pkg_xml {bytes} -- top level relationships XML (_rels/.rels)
+
+    Returns:
+        dict -- property and document paths
+    """
+    rels = xml_util.parse(pkg_xml)
+
+    return {
+        simplify_rel(rel.attrib): locate_rel(rel.attrib)
+        for rel
+        in rels.iter()}
+
+
+def parse_properties(prop_xml):
+    # type: (bytes) -> dict
+    """Parse XML for document metadata
+
+    Arguments:
+        prop_xml {bytes} -- property XML file (docProps XML)
+
+    Returns:
+        dict -- document metadata
+    """
+    props = xml_util.parse(prop_xml)
+
+    return {xml_util.unquote(prop.tag): prop.text for prop in props.iter()}
+
+
+def is_property_rel(rel_type):
+    # type: (str) -> bool
+    """Test for string indicating a property relationship
+
+    Arguments:
+        rel_type {str} -- relationship type
+
+    Returns:
+        bool -- relationship is a property
+    """
+    return rel_type.endswith('-properties')
+
+
+def get_package_props(pkg, pkg_rels):
+    # type: (ZipFile, dict) -> dict
+    """Get all properties of package
+
+    Arguments:
+        pkg {ZipFile} -- package as ZipFile
+        pkg_rels {dict} -- all package relationships
+
+    Returns:
+        dict -- properties of package
+    """
+    prop_dicts = [
+        parse_properties(pkg.read(path))
+        for path
+        in dict_util.filter_key(pkg_rels, is_property_rel)]
+
+    return dict_util.merge(prop_dicts)
+
+
+def get_part_rels_path(part_path):
+    # type: (str) -> str
+    """Get path to document relationships
+
+    Arguments:
+        part_path {str} -- path to officeDocument relationship
+
+    Returns:
+        str -- path to relationships for ``part_path``
+    """
+    path_comps = [
+        os_path.dirname(part_path).lstrip('/'),
+        '_rels',
+        os_path.basename(part_path) + '.rels']
+
+    return '/'.join(path_comps)
+
+
+def get_part_rels(pkg, part_key, part_path):
+    # type: (ZipFile, str, str) -> dict
+    """Parse relationships of part (document)
+
+    Arguments:
+        pkg {zipfile.ZipFile} -- package ZipFile
+        part_key {str} -- key to store path of officeDocument part
+        part_path {str} -- path to officeDocument part in package
+
+    Returns:
+        dict -- dictionary of XML data
+    """
+    base_path = os_path.dirname(part_path).lstrip('/')
+    rels_path = get_part_rels_path(part_path)
+    rel_nodes = xml_util.parse(pkg.read(rels_path))
+
+    rels = {}  # type: dict
+    for rel_node in rel_nodes.iter():
+        key = simplify_rel(rel_node.attrib)
+        path = '/'.join([base_path, rel_node.attrib.get('Target', '')])
+
+        rels[key] = rels.get(key, []) + [path]
+
+    rels.update({part_key: [part_path]})
+
+    return rels
+
+
+def get_all_rels(pkg, part_type):
+    # type: (ZipFile, str) -> tuple
+    """Get relationships for package and part of ``part_type``
+
+    Arguments:
+        pkg {ZipFile} -- package as ZipFile
+        part_type {str} -- type of 'part' to locate (officeDocument)
+
+    Returns:
+        tuple -- package relationships, part relationships
+    """
+    pkg_rels = get_package_rels(pkg.read('_rels/.rels'))
+    part_path = pkg_rels.get(part_type, 'word/document.xml')
+    part_rels = get_part_rels(pkg, part_type, part_path)
+
+    return pkg_rels, part_rels
+
+
+def mkdir_p(path):
+    # type: (str) -> None
+    """Recursively create directory at ``path``
+
+    Arguments:
+        path {str} -- directory to create
+    """
+    try:
+        makedirs(path)
+    except OSError as err:
+        if err.errno != errno.EEXIST or not os_path.isdir(path):
+            raise
+
+
+def extract_image(img_bytes, img_dir, fname):
+    # type: (bytes, str, str) -> str
+    """Write image data to disk
+
+    Arguments:
+        img_bytes {bytes} -- image data
+        img_dir {str} -- output directory
+        fname {str} -- name of source file
+
+    Returns:
+        str -- absolute path to extracted image
+    """
+    dst_fname = os_path.join(img_dir, os_path.basename(fname))
+
+    with open(dst_fname, 'wb') as dst_f:
+        dst_f.write(img_bytes)
+
+    return os_path.abspath(dst_fname)
+
+
+def xml2text(xml):
+    """
+    A string representing the textual content of this run, with content
+    child elements like ``<w:tab/>`` translated to their Python
+    equivalent.
+    Adapted from: https://github.com/python-openxml/python-docx/
+    """
+    text = u''
+    root = xml_util.parse(xml)
+    whitespace_tags = {
+        xml_util.quote('w:tab'): '\t',
+        xml_util.quote('w:br'): '\n',
+        xml_util.quote('w:cr'): '\n',
+        xml_util.quote('w:p'): '\n\n', }
+    text_tag = xml_util.quote('w:t')
+    for child in root.iter():
+        text += whitespace_tags.get(child.tag, '')
+        if child.tag == text_tag and child.text is not None:
+            text += child.text
+    return text
+
+
+def xml2dict(xml):
+    # type: (bytes) -> dict
+    """Get dictionary of values from ``xml``
+
+    Arguments:
+        xml {bytes} -- contents of XML file
+
+    Returns:
+        dict -- dictionary of {node.tagName: node.text}
+    """
+    root = xml_util.parse(xml)
+    data = {
+        xml_util.unquote(child.tag): child.text
+        for child in root.iter()}
+    return data
+
+
+def read_docx(path, img_dir):
+    # type: (str, str) -> dict
+    """Load and parse contents of file at ``path``
+
+    Arguments:
+        path {str} -- path to DOCX file
+
+    Keyword Arguments:
+        img_dir {str} -- save images in specififed directory
+
+    Returns:
+        dict -- header, main, footer, images, and properties of DOCX file
+    """
+    HEAD_KEY = 'header'
+    MAIN_KEY = 'officeDocument'
+    FOOT_KEY = 'footer'
+    IMG_KEY = 'image'
+
+    with ZipFile(path) as pkg:
+        pkg_rels, doc_rels = get_all_rels(pkg, MAIN_KEY)
+
+        text = {
+            key: ''.join([
+                xml2text(pkg.read(fname))
+                for fname in doc_rels.get(key, [])])
+            for key in [HEAD_KEY, MAIN_KEY, FOOT_KEY]}  # type: dict
+
+        images = []  # type: list
+        if img_dir is None:
+            images += [
+                os_path.basename(fname)
+                for fname in doc_rels.get(IMG_KEY, [])]
+        else:
+            mkdir_p(img_dir)
+            images += [
+                extract_image(pkg.read(fname), img_dir, fname)
+                for fname in doc_rels.get(IMG_KEY, [])]
+
+        props = get_package_props(pkg, pkg_rels)
+
+    return {
+        'header': text.get(HEAD_KEY),
+        'main': text.get(MAIN_KEY),
+        'footer': text.get(FOOT_KEY),
+        'images': images,
+        'properties': props, }
+
+
+def get_path(path):
+    # type: (object) -> str
+    """Get absolute path to document
+
+    Arguments:
+        path {str} -- path to DOCX file (nominal)
+
+    Returns:
+        str -- path to document (absolute)
+    """
+    # simple filesystem path string
+    try:
+        return os_path.abspath(str(path))
+    except TypeError:
+        pass
+
+    # TextIOWrapper, addinfourl, HTTPResponse... and more?
+    for attr in (getattr(path, key) for key in ('name', 'url')):
+        if attr is not None:
+            return str(attr)
+
+    return ''
+
+
+class DocxFile(object):
+    def __init__(self, file, img_dir=None):
+        doc_data = read_docx(file, img_dir)
+
+        self._path = get_path(file)                # type: str
+        self._img_dir = img_dir                    # type: str
+        self._header = doc_data['header']          # type: str
+        self._main = doc_data['main']              # type: str
+        self._footer = doc_data['footer']          # type: str
+        self._images = doc_data['images']          # type: list
+        self._properties = doc_data['properties']  # type: dict
+
+    def __str__(self):
+        if sys.version_info[0] < 3:
+            return self._main.encode('utf-8')
+
+        return self._main
+
+    def __repr__(self):
+        return 'DocxFile({!r}, {!r})'.format(self._path, self._img_dir)
+
+    @property
+    def path(self):
+        return self._path
+
+    @property
+    def img_dir(self):
+        return self._img_dir
+
+    @property
+    def header(self):
+        return self._header
+
+    @property
+    def main(self):
+        return self._main
+
+    @property
+    def footer(self):
+        return self._footer
+
+    @property
+    def images(self):
+        return self._images
+
+    @property
+    def properties(self):
+        return self._properties
+
+    @property
+    def text(self):
+        return str(self).strip()
diff --git a/docx2txt/xml_util.py b/docx2txt/xml_util.py
new file mode 100644
index 0000000..7b4d061
--- /dev/null
+++ b/docx2txt/xml_util.py
@@ -0,0 +1,37 @@
+"""XML Utilities"""
+
+import xml.etree.ElementTree as ET
+
+
+def quote(tag):
+    """
+    Turn a namespace
+    prefixed tag name into a Clark-notation qualified tag name for lxml. For
+    example, ``qn('p:cSld')`` returns ``'{http://schemas.../main}cSld'``.
+    Source: https://github.com/python-openxml/python-docx/
+    """
+    nsmap = {
+        'w': 'http://schemas.openxmlformats.org/wordprocessingml/2006/main'}
+    prefix, tagroot = tag.split(':')
+    uri = nsmap[prefix]
+    return '{{{}}}{}'.format(uri, tagroot)
+
+
+def unquote(tag):
+    # type: (str) -> str
+    """Remove namespace from prefixed tag.
+
+    See: [Python issue 18304](https://bugs.python.org/issue18304)
+
+    Arguments:
+        tag {str} -- (possibly-)namespaced tag
+
+    Returns:
+        str -- tag name without namespace
+    """
+    return tag.split('}').pop()
+
+
+def parse(xml_bytes):
+    # type: (bytes) -> ET.Element
+    return ET.fromstring(xml_bytes)
diff --git a/setup.py b/setup.py
index 004158a..212c16a 100644
--- a/setup.py
+++ b/setup.py
@@ -1,13 +1,13 @@
 import glob
+# pylint: disable=no-name-in-module,import-error
 from distutils.core import setup
-
 # get all of the scripts
 scripts = glob.glob('bin/*')
 
 setup(
   name='docx2txt',
   packages=['docx2txt'],
-  version='0.7',
+  version='0.8',
   description='A pure python-based utility to extract text and images '
               'from docx files.',
   author='Ankush Shah',