Skip to content

DatabaseManager() Class (scrapeNews.db)

Ayush Agarwal edited this page Dec 20, 2017 · 2 revisions

DatabaseManager() class provides various utilities to connect to the database and perform certain operations. It also consists of pre-defined strings for inserting data in Database.

Methods

Constructor

Method instantiates the conn and cursor object of the class.

def __init__(self, connection = None, endOnDel = True):
    if connection == None:
        self.connect()
    else:
        self.conn = connection.conn
        self.cursor = connection.cursor
        self.endOnDel = endOnDel

Defaults:

  • connection: None
  • endOnDel: True

Creates a new connection if connection is None, uses existing connection provided by connection otherwise.

DatabaseManager.connect()

Method Connects to Database and initializes it using DatabaseManager.initialize() method.

Returns: cursor object

def connect(self):
    if self.conn != None:
        if self.conn.closed == 0:
            return self.cursor
    try:
        self.conn = psycopg2.connect(self.connect_str)
        self.conn.autocommit = True
        self.cursor = self.conn.cursor(cursor_factory = psycopg2.extras.DictCursor)
        logger.debug(__name__+" Connected to Database")
        self.initilaize()
        return self.cursor
    except Exception as e:
        logger.critical(__name__+" Database Connection Error!"+str(e))

DatabaseManager.initialize()

Method Creates Tables in database, if not already.

Returns: Bool. True on successfull Initialization, False otherwise.

def initilaize(self):
    try:
        if self.connect() == None:
            return False
        # Create Tables
        self.cursor.execute(self.site_table)
        self.cursor.execute(self.logs_table)
        self.cursor.execute(self.item_table)
        #Commit
        self.conn.commit()
        return True
     except Exception as e:
        self.conn.rollback()
        logger.error(__name__+" Database Initilaization Error "+str(e))
        return False

DatabaseManager.getSiteId(site_name)

Method fetches site_id from Database using site_name from site_table

Returns: site_id on Success, False otherwise.

def getSiteId(self, site_name):
    if self.connect() == None:
        return False
    logger.debug(__name__+" Searching Database for "+ site_name)
    sql = "SELECT id FROM "+self.site_table_name+" WHERE site_name = %s LIMIT 1;"
    cur = self.cursor
    cur.execute(sql, (site_name,))
    if cur.rowcount != 0:
        site_id = cur.fetchone()['id']
        logger.debug("Found "+site_name+" at id: "+str(site_id))
        return site_id
    else:
        return False

DatabaseManager.getUrlsScraped(site_id)

Method Returns a List of All Urls for given site_id

Returns: List Of Urls

def getUrlsScraped(self, site_id):
    if site_id == False:
        return []
    try:
        if self.connect() == None:
            return False
        sql = "SELECT link FROM "+self.item_table_name+" WHERE site= %s"
        cur = self.cursor
        cur.execute(sql, (site_id,))
        urls_parsed = []
        for url in cur.fetchall():
            urls_parsed.append(url['url'])
        return urls_parsed
    except Exception as e:
        logger.error(__name__+" DATABASE ERROR: "+str(e))
    return []

DatabaseManager.urlExists(url)

Method check if given url exists in database.

Returns: Bool. True, if url is found, False on Faliure.

def urlExists(self, url):
    try:
        if self.connect() == None:
            return False
        sql = "SELECT link FROM "+self.item_table_name+" WHERE link= %s"
        cur = self.cursor
        cur.execute(sql, (url,))
        rows = cur.rowcount
        if rows > 0:
            return True
        else:
            return False
    except Exception as e:
       logger.error(__name__+" DATABASE ERROR: "+str(e))
    return False