DataStories-UniPi · nikolas-tgk · Feb 20, 2023
diff --git a/PARADOXES.txt b/PARADOXES.txt
@@ -0,0 +1,7 @@
+gia na kano genika uniques prepei anamesa se kathe koma na yparxei space gia paradeigma: 
+
+create table testimus1 (ena str unique , dio str)
+
+create table testimus1 (ena str unique , dio str primary key , tria str )
+
+create index indo on testimus1(ena) using btree (MONO B TREE SUPPORTED)
diff --git a/__pycache__/mdb.cpython-310.pyc b/__pycache__/mdb.cpython-310.pyc
diff --git a/changelog.pdf b/changelog.pdf
diff --git a/dbdata/smdb_db/advisor.pkl b/dbdata/smdb_db/advisor.pkl
diff --git a/dbdata/smdb_db/boy.pkl b/dbdata/smdb_db/boy.pkl
diff --git a/dbdata/smdb_db/boy2.pkl b/dbdata/smdb_db/boy2.pkl
diff --git a/dbdata/smdb_db/classroom.pkl b/dbdata/smdb_db/classroom.pkl
diff --git a/dbdata/smdb_db/course.pkl b/dbdata/smdb_db/course.pkl
diff --git a/dbdata/smdb_db/department.pkl b/dbdata/smdb_db/department.pkl
diff --git a/dbdata/smdb_db/instructor.pkl b/dbdata/smdb_db/instructor.pkl
diff --git a/dbdata/smdb_db/meta_indexes.pkl b/dbdata/smdb_db/meta_indexes.pkl
diff --git a/dbdata/smdb_db/meta_insert_stack.pkl b/dbdata/smdb_db/meta_insert_stack.pkl
diff --git a/dbdata/smdb_db/meta_length.pkl b/dbdata/smdb_db/meta_length.pkl
diff --git a/dbdata/smdb_db/meta_locks.pkl b/dbdata/smdb_db/meta_locks.pkl
diff --git a/dbdata/smdb_db/prereq.pkl b/dbdata/smdb_db/prereq.pkl
diff --git a/dbdata/smdb_db/section.pkl b/dbdata/smdb_db/section.pkl
diff --git a/dbdata/smdb_db/student.pkl b/dbdata/smdb_db/student.pkl
diff --git a/dbdata/smdb_db/takes.pkl b/dbdata/smdb_db/takes.pkl
diff --git a/dbdata/smdb_db/teaches.pkl b/dbdata/smdb_db/teaches.pkl
diff --git a/dbdata/smdb_db/tes.pkl b/dbdata/smdb_db/tes.pkl
diff --git a/dbdata/smdb_db/tes2.pkl b/dbdata/smdb_db/tes2.pkl
diff --git a/dbdata/smdb_db/tes3.pkl b/dbdata/smdb_db/tes3.pkl
diff --git a/dbdata/smdb_db/tes4.pkl b/dbdata/smdb_db/tes4.pkl
diff --git a/dbdata/smdb_db/tes6.pkl b/dbdata/smdb_db/tes6.pkl
diff --git a/dbdata/smdb_db/tes7.pkl b/dbdata/smdb_db/tes7.pkl
diff --git a/dbdata/smdb_db/tes9.pkl b/dbdata/smdb_db/tes9.pkl
diff --git a/dbdata/smdb_db/test1.pkl b/dbdata/smdb_db/test1.pkl
diff --git a/dbdata/smdb_db/test2.pkl b/dbdata/smdb_db/test2.pkl
diff --git a/dbdata/smdb_db/test4.pkl b/dbdata/smdb_db/test4.pkl
diff --git a/dbdata/smdb_db/test5.pkl b/dbdata/smdb_db/test5.pkl
diff --git a/dbdata/smdb_db/test8.pkl b/dbdata/smdb_db/test8.pkl
diff --git a/dbdata/smdb_db/test9.pkl b/dbdata/smdb_db/test9.pkl
diff --git a/dbdata/smdb_db/time_slot.pkl b/dbdata/smdb_db/time_slot.pkl
diff --git a/dbdata/smdb_db/uniqoz1.pkl b/dbdata/smdb_db/uniqoz1.pkl
diff --git a/index_uniques.pkl b/index_uniques.pkl
diff --git a/mdb.py b/mdb.py
diff --git a/meta_index_type.pkl b/meta_index_type.pkl
diff --git a/miniDB/__pycache__/__init__.cpython-310.pyc b/miniDB/__pycache__/__init__.cpython-310.pyc
diff --git a/miniDB/__pycache__/btree.cpython-310.pyc b/miniDB/__pycache__/btree.cpython-310.pyc
diff --git a/miniDB/__pycache__/database.cpython-310.pyc b/miniDB/__pycache__/database.cpython-310.pyc
diff --git a/miniDB/__pycache__/joins.cpython-310.pyc b/miniDB/__pycache__/joins.cpython-310.pyc
diff --git a/miniDB/__pycache__/misc.cpython-310.pyc b/miniDB/__pycache__/misc.cpython-310.pyc
diff --git a/miniDB/__pycache__/table.cpython-310.pyc b/miniDB/__pycache__/table.cpython-310.pyc
diff --git a/miniDB/btree.py b/miniDB/btree.py
@@ -7,6 +7,8 @@ class Node:
     Node abstraction. Represents a single bucket
     '''
     def __init__(self, b, values=None, ptrs=None,left_sibling=None, right_sibling=None, parent=None, is_leaf=False):
+
+
         self.b = b # branching factor
         self.values = [] if values is None else values # Values (the data from the pk column)
         self.ptrs = [] if ptrs is None else ptrs # ptrs (the indexes of each datapoint or the index of another bucket)

diff --git a/miniDB/database.py b/miniDB/database.py
diff --git a/miniDB/misc.py b/miniDB/misc.py
@@ -1,40 +1,101 @@
 import operator
 
+def between(value,range):
+    '''implements between functionality
+    checks if value is between range (limits included)
+     value: the specific value stored in table we are comparing
+     range: range of accepted values from between keyword; is string; must contain split_key''' 
+
+    split_key='&' # exp: BETWEEN 5 AND 25;
+    if(split_key not in range):
+        raise IndexError('Between syntax: BETWEEN "value1 & value2".')    
+    try: # comparing floats-ints
+        range = [float(x) for x in range.split(split_key)] # splits the between range
+        float(value) # will work if value we are comparing is float or int
+    except ValueError: # are we comparing strings?
+        range = range.split('&') # range input must not include the split character
+        #print("range:",range[0],range[1]) #DEBUG
+    if ((value>=range[0] and value<=range[1]) or (value>=range[1] and value<=range[0])): # BETWEEN 5 & 10 == BETWEEN 10 & 5
+        return True
+    else: 
+        return False
+
+def not_between(value,range):
+    '''reverse of between, is true when value is outside of range, limits exlcuded (like typical sql)''' 
+    split_key='&' # exp: BETWEEN 5 AND 25;
+    if(split_key not in range):
+        raise IndexError('Between syntax: BETWEEN "value1 & value2".')    
+    try: # comparing floats-ints
+        range = [float(x) for x in range.split(split_key)] # splits the between range
+        float(value) # will work if value we are comparing is float or int
+    except ValueError: # are we comparing strings?
+        range = range.split('&') # range input must not include the split character
+        #print("range:",range[0],range[1]) #DEBUG
+    if (not((value>=range[0] and value<=range[1]) or (value>=range[1] and value<=range[0]))): # BETWEEN 5 & 10 == BETWEEN 10 & 5
+        return True
+    else: 
+        return False
+
+def reverse_operator(op):
+    '''reverses the operator when we are using NOT in specific condition, works with between and != : = too!'''
+    return  {
+    '>' : '<=',
+    '>=' : '<',
+    '<' : '>=',
+    '<=' : '>',
+    '!=' : '=',
+    '=' : '!=',
+    'between' : 'not_between'}.get(op) # specifically not adding not_between : between as it is will be the same as NOT BETWEEN
+
 def get_op(op, a, b):
     '''
     Get op as a function of a and b by using a symbol
     '''
     ops = {'>': operator.gt,
                 '<': operator.lt,
+                '!=': operator.ne,
                 '>=': operator.ge,
                 '<=': operator.le,
-                '=': operator.eq}
+                '=': operator.eq,
+                'between': between, # matching between keyword with def between(value,range)
+                'not_between' : not_between} # matching not_between keyword with def not_between(value,range)
 
     try:
         return ops[op](a,b)
     except TypeError:  # if a or b is None (deleted record), python3 raises typerror
         return False
 
 def split_condition(condition):
+    isNOT = False
     ops = {'>=': operator.ge,
            '<=': operator.le,
+           '!=': operator.ne,
            '=': operator.eq,
            '>': operator.gt,
-           '<': operator.lt}
+           '<': operator.lt,
+           'between': between # added between operation
+           }
 
     for op_key in ops.keys():
+        if("not" in condition):
+            condition = condition.replace("not ","") # delete NOT, save detection with bool isNOT and continue 
+            isNOT =True # Not detected = True
+            #print (condition,isNOT) #debug
         splt=condition.split(op_key)
         if len(splt)>1:
             left, right = splt[0].strip(), splt[1].strip()
-
             if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them.
                 right = right.strip('"')
             elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw.
                 raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')
 
             if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
                 raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')
-
+            if(isNOT): # if not is detected then reverse the operator logic
+                op_key = reverse_operator(op_key)
+                #print("reversed")#debug
+                #print("OPKEY",op_key)#debug
+            #print(op_key)#debug
             return left, op_key, right
 
 def reverse_op(op):
@@ -46,5 +107,7 @@ def reverse_op(op):
         '>=' : '<=',
         '<' : '>',
         '<=' : '>=',
+        '!=' : '!=',
         '=' : '='
     }.get(op)
+
diff --git a/miniDB/table.py b/miniDB/table.py
@@ -1,5 +1,6 @@
 from __future__ import annotations
-from tabulate import tabulate
+import pandas as panda 
+from tabulate import tabulate # prints tables in a nice format
 import pickle
 import os
 import sys
@@ -27,6 +28,7 @@ class Table:
 
     '''
     def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None):
+        #print("table.py __init__ func RUN")
 
         if load is not None:
             # if load is a dict, replace the object dict with it (replaces the object with the specified one)
@@ -67,16 +69,30 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key=
             else:
                 self.pk_idx = None
 
+            #givinga value to anew variablee named unique index in order to use it when creating and showing the table later    
+            if os.path.isfile('./unique_table.pkl'):
+                dataFr=panda.read_pickle('./unique_table.pkl')
+                searcher=(dataFr['tab_name']==name) 
+                res=dataFr[searcher]
+                if res.empty:
+                    self.unique_idx=None
+                else:
+                    unique_boy=res.iloc[0]['unique_column']
+                    self.unique_idx=self.column_names.index(unique_boy)
+
+
             self.pk = primary_key
             # self._update()
 
     # if any of the name, columns_names and column types are none. return an empty table object
 
     def column_by_name(self, column_name):
+        #print("table.py column_by_name func RUN")
         return [row[self.column_names.index(column_name)] for row in self.data]
 
 
     def _update(self):
+        #print("_update RUN")
         '''
         Update all the available columns with the appended rows.
         '''
@@ -85,6 +101,7 @@ def _update(self):
             setattr(self, col, self.columns[ind])
 
     def _cast_column(self, column_name, cast_type):
+        #print("_casr_column func RUN")
         '''
         Cast all values of a column using a specified type.
 
@@ -103,13 +120,20 @@ def _cast_column(self, column_name, cast_type):
 
 
     def _insert(self, row, insert_stack=[]):
+        #print("_insert func RUN")
         '''
         Insert row to table.
 
         Args:
             row: list. A list of values to be inserted (will be casted to a predifined type automatically).
             insert_stack: list. The insert stack (empty by default).
         '''
+
+        table_n=row[0].strip("'")
+
+
+
+        #print(self.unique_idx)
         if len(row)!=len(self.column_names):
             raise ValueError(f'ERROR -> Cannot insert {len(row)} values. Only {len(self.column_names)} columns exist')
 
@@ -138,6 +162,7 @@ def _insert(self, row, insert_stack=[]):
         # self._update()
 
     def _update_rows(self, set_value, set_column, condition):
+        #print("_update_rows func RUN")
         '''
         Update where Condition is met.
 
@@ -223,10 +248,11 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
             desc: boolean. If True, order_by will return results in descending order (False by default).
             limit: int. An integer that defines the number of rows that will be returned (all rows if None).
         '''
-
+        #print("_select_where func RUN")
         # if * return all columns, else find the column indexes for the columns specified
         if return_columns == '*':
             return_cols = [i for i in range(len(self.column_names))]
+            #print (return_cols)
         else:
             return_cols = [self.column_names.index(col.strip()) for col in return_columns.split(',')]
 
@@ -270,20 +296,94 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
         return s_table
 
 
-    def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None):
+    def _select_where_with_hashindex(self, return_columns, index, condition, distinct=False, order_by=None, desc=True, limit=None):
+
+        '''
+        This function makes the select from where choice using a filter of index type, this function is called if there is an index pressent on the column of the search 
+        Args:
+            return_columns: list. The columns to be returned.
+            index:string.Index to be used 
+            condition: string. A condition using the following format:
+                'column[<,<=,==,>=,>]value' or
+                'value[<,<=,==,>=,>]column'.
+
+                Operatores supported: (<,<=,==,>=,>)
+            distinct: boolean. If True, the resulting table will contain only unique rows (False by default).
+            order_by: string. A column name that signals that the resulting table should be ordered based on it (no order if None).
+            desc: boolean. If True, order_by will return results in descending order (False by default).
+            limit: int. An integer that defines the number of rows that will be returned (all rows if None).
+        '''
+        if return_columns == '*':
+            return_cols = [i for i in range(len(self.column_names))]
+        else:
+            return_cols = [self.column_names.index(colname) for colname in return_columns]
+
+        column_name, operator, value = self._parse_condition(condition)
+
+        # if the column in condition is not a primary key, abort the select
+
+
+
+
+        # find the rows that match the condition using the hash index
+        rows = []
+        if operator == '=':
+            column = self.column_by_name(column_name)
+
+            result = index.find(value)
+            print(result)
+            if result is not None:
+                rows.append(result)
+        else:
+            # if the operator is not '=', we need to manually search through the index
+            pairs = index.get_all()
+            for key, value in pairs:
+                if get_op(operator, key, value):
+                    rows.append(value)
+
+        try:
+            k = int(limit)
+        except TypeError:
+            k = None
+        # same as simple select from now on
+        rows = rows[:k]
+        # TODO: this needs to be dumbed down
+        dict = {(key):([[self.data[i][j] for j in return_cols] for i in rows] if key=="data" else value) for key,value in self.__dict__.items()}
+
+        dict['column_names'] = [self.column_names[i] for i in return_cols]
+        dict['column_types']   = [self.column_types[i] for i in return_cols]
 
+        s_table = Table(load=dict)
+
+        s_table.data = list(set(map(lambda x: tuple(x), s_table.data))) if distinct else s_table.data
+
+        if order_by:
+            s_table.order_by(order_by, desc)
+
+        if isinstance(limit,str):
+            s_table.data = [row for row in s_table.data if row is not None][:int(limit)]
+
+        return s_table
+
+    def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None):
+        '''
+        The same function as created but with some support when i use index on a unique column
+        '''
+
+        #print("I RUN!? BTREE_SELECT")
         # if * return all columns, else find the column indexes for the columns specified
         if return_columns == '*':
             return_cols = [i for i in range(len(self.column_names))]
         else:
+
             return_cols = [self.column_names.index(colname) for colname in return_columns]
+            print("return columns"+ return_cols)
 
-
+        
         column_name, operator, value = self._parse_condition(condition)
 
         # if the column in condition is not a primary key, abort the select
-        if column_name != self.column_names[self.pk_idx]:
-            print('Column is not PK. Aborting')
+
 
         # here we run the same select twice, sequentially and using the btree.
         # we then check the results match and compare performance (number of operation)
@@ -513,7 +613,8 @@ def _full_join(self, table_right: Table, condition):
 
         return join_table
 
-    def show(self, no_of_rows=None, is_locked=False):
+    def show(self, no_of_rows=None, is_locked=False,print_output=True):
+        #print("table.py show func RUN")
         '''
         Print the table in a nice readable format.
 
@@ -533,14 +634,35 @@ def show(self, no_of_rows=None, is_locked=False):
         if self.pk_idx is not None:
             # table has a primary key, add PK next to the appropriate column
             headers[self.pk_idx] = headers[self.pk_idx]+' #PK#'
-        # detect the rows that are no tfull of nones (these rows have been deleted)
-        # if we dont skip these rows, the returning table has empty rows at the deleted positions
+
+        #here i wil ldetect when printing table and and aa #uniques tag
+        if os.path.isfile('./unique_table.pkl'):
+            dataFr=panda.read_pickle('./unique_table.pkl')
+            searcher=(dataFr['tab_name']==self._name) 
+            res=dataFr[searcher]
+            if res.empty:
+                print('')
+
+            else:
+                unique_boy1=res.iloc[0]['unique_column']
+
+                print(unique_boy1)
+                if self.unique_idx is not None:
+                    headers[self.unique_idx] = headers[self.unique_idx]+' #UNIQUE#'
+
         non_none_rows = [row for row in self.data if any(row)]
+
+        ### x
         # print using tabulate
-        print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n')
+        #print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n')
+        if(print_output):
+            print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n')
+        return headers,non_none_rows
+        ###
 
 
     def _parse_condition(self, condition, join=False):
+        #print("[table.py] (_parse_condition) condition:",condition," this is where where happens")
         '''
         Parse the single string condition and return the value of the column and the operator.
 
@@ -561,11 +683,15 @@ def _parse_condition(self, condition, join=False):
         if left not in self.column_names:
             raise ValueError(f'Condition is not valid (cant find column name)')
         coltype = self.column_types[self.column_names.index(left)]
-
-        return left, op, coltype(right)
+        if(op=='between' or op=='not_between'):
+            #print("[table.py] (_parse_condition) between detected")
+            return left, op, str(right) #between condition is always a string, type is handled internaly with between function (misc.py).
+        else:
+            return left,op,coltype(right)
 
 
     def _load_from_file(self, filename):
+        #print("table.py i just _load_from_file")
         '''
         Load table from a pkl file (not used currently).