Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions PARADOXES.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
gia na kano genika uniques prepei anamesa se kathe koma na yparxei space gia paradeigma:

create table testimus1 (ena str unique , dio str)

create table testimus1 (ena str unique , dio str primary key , tria str )

create index indo on testimus1(ena) using btree (MONO B TREE SUPPORTED)
Binary file added __pycache__/mdb.cpython-310.pyc
Binary file not shown.
Binary file added changelog.pdf
Binary file not shown.
Binary file added dbdata/smdb_db/advisor.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/boy.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/boy2.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/classroom.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/course.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/department.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/instructor.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/meta_indexes.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/meta_insert_stack.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/meta_length.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/meta_locks.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/prereq.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/section.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/student.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/takes.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/teaches.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/tes.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/tes2.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/tes3.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/tes4.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/tes6.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/tes7.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/tes9.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/test1.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/test2.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/test4.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/test5.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/test8.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/test9.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/time_slot.pkl
Binary file not shown.
Binary file added dbdata/smdb_db/uniqoz1.pkl
Binary file not shown.
Binary file added index_uniques.pkl
Binary file not shown.
261 changes: 241 additions & 20 deletions mdb.py

Large diffs are not rendered by default.

Binary file added meta_index_type.pkl
Binary file not shown.
Binary file added miniDB/__pycache__/__init__.cpython-310.pyc
Binary file not shown.
Binary file added miniDB/__pycache__/btree.cpython-310.pyc
Binary file not shown.
Binary file added miniDB/__pycache__/database.cpython-310.pyc
Binary file not shown.
Binary file added miniDB/__pycache__/joins.cpython-310.pyc
Binary file not shown.
Binary file added miniDB/__pycache__/misc.cpython-310.pyc
Binary file not shown.
Binary file added miniDB/__pycache__/table.cpython-310.pyc
Binary file not shown.
2 changes: 2 additions & 0 deletions miniDB/btree.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ class Node:
Node abstraction. Represents a single bucket
'''
def __init__(self, b, values=None, ptrs=None,left_sibling=None, right_sibling=None, parent=None, is_leaf=False):


self.b = b # branching factor
self.values = [] if values is None else values # Values (the data from the pk column)
self.ptrs = [] if ptrs is None else ptrs # ptrs (the indexes of each datapoint or the index of another bucket)
Expand Down
828 changes: 766 additions & 62 deletions miniDB/database.py

Large diffs are not rendered by default.

71 changes: 67 additions & 4 deletions miniDB/misc.py
Original file line number Diff line number Diff line change
@@ -1,40 +1,101 @@
import operator

def between(value,range):
'''implements between functionality
checks if value is between range (limits included)
value: the specific value stored in table we are comparing
range: range of accepted values from between keyword; is string; must contain split_key'''

split_key='&' # exp: BETWEEN 5 AND 25;
if(split_key not in range):
raise IndexError('Between syntax: BETWEEN "value1 & value2".')
try: # comparing floats-ints
range = [float(x) for x in range.split(split_key)] # splits the between range
float(value) # will work if value we are comparing is float or int
except ValueError: # are we comparing strings?
range = range.split('&') # range input must not include the split character
#print("range:",range[0],range[1]) #DEBUG
if ((value>=range[0] and value<=range[1]) or (value>=range[1] and value<=range[0])): # BETWEEN 5 & 10 == BETWEEN 10 & 5
return True
else:
return False

def not_between(value,range):
'''reverse of between, is true when value is outside of range, limits exlcuded (like typical sql)'''
split_key='&' # exp: BETWEEN 5 AND 25;
if(split_key not in range):
raise IndexError('Between syntax: BETWEEN "value1 & value2".')
try: # comparing floats-ints
range = [float(x) for x in range.split(split_key)] # splits the between range
float(value) # will work if value we are comparing is float or int
except ValueError: # are we comparing strings?
range = range.split('&') # range input must not include the split character
#print("range:",range[0],range[1]) #DEBUG
if (not((value>=range[0] and value<=range[1]) or (value>=range[1] and value<=range[0]))): # BETWEEN 5 & 10 == BETWEEN 10 & 5
return True
else:
return False

def reverse_operator(op):
'''reverses the operator when we are using NOT in specific condition, works with between and != : = too!'''
return {
'>' : '<=',
'>=' : '<',
'<' : '>=',
'<=' : '>',
'!=' : '=',
'=' : '!=',
'between' : 'not_between'}.get(op) # specifically not adding not_between : between as it is will be the same as NOT BETWEEN

def get_op(op, a, b):
'''
Get op as a function of a and b by using a symbol
'''
ops = {'>': operator.gt,
'<': operator.lt,
'!=': operator.ne,
'>=': operator.ge,
'<=': operator.le,
'=': operator.eq}
'=': operator.eq,
'between': between, # matching between keyword with def between(value,range)
'not_between' : not_between} # matching not_between keyword with def not_between(value,range)

try:
return ops[op](a,b)
except TypeError: # if a or b is None (deleted record), python3 raises typerror
return False

def split_condition(condition):
isNOT = False
ops = {'>=': operator.ge,
'<=': operator.le,
'!=': operator.ne,
'=': operator.eq,
'>': operator.gt,
'<': operator.lt}
'<': operator.lt,
'between': between # added between operation
}

for op_key in ops.keys():
if("not" in condition):
condition = condition.replace("not ","") # delete NOT, save detection with bool isNOT and continue
isNOT =True # Not detected = True
#print (condition,isNOT) #debug
splt=condition.split(op_key)
if len(splt)>1:
left, right = splt[0].strip(), splt[1].strip()

if right[0] == '"' == right[-1]: # If the value has leading and trailing quotes, remove them.
right = right.strip('"')
elif ' ' in right: # If it has whitespaces but no leading and trailing double quotes, throw.
raise ValueError(f'Invalid condition: {condition}\nValue must be enclosed in double quotation marks to include whitespaces.')

if right.find('"') != -1: # If there are any double quotes in the value, throw. (Notice we've already removed the leading and trailing ones)
raise ValueError(f'Invalid condition: {condition}\nDouble quotation marks are not allowed inside values.')

if(isNOT): # if not is detected then reverse the operator logic
op_key = reverse_operator(op_key)
#print("reversed")#debug
#print("OPKEY",op_key)#debug
#print(op_key)#debug
return left, op_key, right

def reverse_op(op):
Expand All @@ -46,5 +107,7 @@ def reverse_op(op):
'>=' : '<=',
'<' : '>',
'<=' : '>=',
'!=' : '!=',
'=' : '='
}.get(op)

150 changes: 138 additions & 12 deletions miniDB/table.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from __future__ import annotations
from tabulate import tabulate
import pandas as panda
from tabulate import tabulate # prints tables in a nice format
import pickle
import os
import sys
Expand Down Expand Up @@ -27,6 +28,7 @@ class Table:

'''
def __init__(self, name=None, column_names=None, column_types=None, primary_key=None, load=None):
#print("table.py __init__ func RUN")

if load is not None:
# if load is a dict, replace the object dict with it (replaces the object with the specified one)
Expand Down Expand Up @@ -67,16 +69,30 @@ def __init__(self, name=None, column_names=None, column_types=None, primary_key=
else:
self.pk_idx = None

#givinga value to anew variablee named unique index in order to use it when creating and showing the table later
if os.path.isfile('./unique_table.pkl'):
dataFr=panda.read_pickle('./unique_table.pkl')
searcher=(dataFr['tab_name']==name)
res=dataFr[searcher]
if res.empty:
self.unique_idx=None
else:
unique_boy=res.iloc[0]['unique_column']
self.unique_idx=self.column_names.index(unique_boy)


self.pk = primary_key
# self._update()

# if any of the name, columns_names and column types are none. return an empty table object

def column_by_name(self, column_name):
#print("table.py column_by_name func RUN")
return [row[self.column_names.index(column_name)] for row in self.data]


def _update(self):
#print("_update RUN")
'''
Update all the available columns with the appended rows.
'''
Expand All @@ -85,6 +101,7 @@ def _update(self):
setattr(self, col, self.columns[ind])

def _cast_column(self, column_name, cast_type):
#print("_casr_column func RUN")
'''
Cast all values of a column using a specified type.

Expand All @@ -103,13 +120,20 @@ def _cast_column(self, column_name, cast_type):


def _insert(self, row, insert_stack=[]):
#print("_insert func RUN")
'''
Insert row to table.

Args:
row: list. A list of values to be inserted (will be casted to a predifined type automatically).
insert_stack: list. The insert stack (empty by default).
'''

table_n=row[0].strip("'")



#print(self.unique_idx)
if len(row)!=len(self.column_names):
raise ValueError(f'ERROR -> Cannot insert {len(row)} values. Only {len(self.column_names)} columns exist')

Expand Down Expand Up @@ -138,6 +162,7 @@ def _insert(self, row, insert_stack=[]):
# self._update()

def _update_rows(self, set_value, set_column, condition):
#print("_update_rows func RUN")
'''
Update where Condition is met.

Expand Down Expand Up @@ -223,10 +248,11 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
desc: boolean. If True, order_by will return results in descending order (False by default).
limit: int. An integer that defines the number of rows that will be returned (all rows if None).
'''

#print("_select_where func RUN")
# if * return all columns, else find the column indexes for the columns specified
if return_columns == '*':
return_cols = [i for i in range(len(self.column_names))]
#print (return_cols)
else:
return_cols = [self.column_names.index(col.strip()) for col in return_columns.split(',')]

Expand Down Expand Up @@ -270,20 +296,94 @@ def _select_where(self, return_columns, condition=None, distinct=False, order_by
return s_table


def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None):
def _select_where_with_hashindex(self, return_columns, index, condition, distinct=False, order_by=None, desc=True, limit=None):

'''
This function makes the select from where choice using a filter of index type, this function is called if there is an index pressent on the column of the search
Args:
return_columns: list. The columns to be returned.
index:string.Index to be used
condition: string. A condition using the following format:
'column[<,<=,==,>=,>]value' or
'value[<,<=,==,>=,>]column'.

Operatores supported: (<,<=,==,>=,>)
distinct: boolean. If True, the resulting table will contain only unique rows (False by default).
order_by: string. A column name that signals that the resulting table should be ordered based on it (no order if None).
desc: boolean. If True, order_by will return results in descending order (False by default).
limit: int. An integer that defines the number of rows that will be returned (all rows if None).
'''
if return_columns == '*':
return_cols = [i for i in range(len(self.column_names))]
else:
return_cols = [self.column_names.index(colname) for colname in return_columns]

column_name, operator, value = self._parse_condition(condition)

# if the column in condition is not a primary key, abort the select




# find the rows that match the condition using the hash index
rows = []
if operator == '=':
column = self.column_by_name(column_name)

result = index.find(value)
print(result)
if result is not None:
rows.append(result)
else:
# if the operator is not '=', we need to manually search through the index
pairs = index.get_all()
for key, value in pairs:
if get_op(operator, key, value):
rows.append(value)

try:
k = int(limit)
except TypeError:
k = None
# same as simple select from now on
rows = rows[:k]
# TODO: this needs to be dumbed down
dict = {(key):([[self.data[i][j] for j in return_cols] for i in rows] if key=="data" else value) for key,value in self.__dict__.items()}

dict['column_names'] = [self.column_names[i] for i in return_cols]
dict['column_types'] = [self.column_types[i] for i in return_cols]

s_table = Table(load=dict)

s_table.data = list(set(map(lambda x: tuple(x), s_table.data))) if distinct else s_table.data

if order_by:
s_table.order_by(order_by, desc)

if isinstance(limit,str):
s_table.data = [row for row in s_table.data if row is not None][:int(limit)]

return s_table

def _select_where_with_btree(self, return_columns, bt, condition, distinct=False, order_by=None, desc=True, limit=None):
'''
The same function as created but with some support when i use index on a unique column
'''

#print("I RUN!? BTREE_SELECT")
# if * return all columns, else find the column indexes for the columns specified
if return_columns == '*':
return_cols = [i for i in range(len(self.column_names))]
else:

return_cols = [self.column_names.index(colname) for colname in return_columns]
print("return columns"+ return_cols)


column_name, operator, value = self._parse_condition(condition)

# if the column in condition is not a primary key, abort the select
if column_name != self.column_names[self.pk_idx]:
print('Column is not PK. Aborting')


# here we run the same select twice, sequentially and using the btree.
# we then check the results match and compare performance (number of operation)
Expand Down Expand Up @@ -513,7 +613,8 @@ def _full_join(self, table_right: Table, condition):

return join_table

def show(self, no_of_rows=None, is_locked=False):
def show(self, no_of_rows=None, is_locked=False,print_output=True):
#print("table.py show func RUN")
'''
Print the table in a nice readable format.

Expand All @@ -533,14 +634,35 @@ def show(self, no_of_rows=None, is_locked=False):
if self.pk_idx is not None:
# table has a primary key, add PK next to the appropriate column
headers[self.pk_idx] = headers[self.pk_idx]+' #PK#'
# detect the rows that are no tfull of nones (these rows have been deleted)
# if we dont skip these rows, the returning table has empty rows at the deleted positions

#here i wil ldetect when printing table and and aa #uniques tag
if os.path.isfile('./unique_table.pkl'):
dataFr=panda.read_pickle('./unique_table.pkl')
searcher=(dataFr['tab_name']==self._name)
res=dataFr[searcher]
if res.empty:
print('')

else:
unique_boy1=res.iloc[0]['unique_column']

print(unique_boy1)
if self.unique_idx is not None:
headers[self.unique_idx] = headers[self.unique_idx]+' #UNIQUE#'

non_none_rows = [row for row in self.data if any(row)]

### x
# print using tabulate
print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n')
#print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n')
if(print_output):
print(tabulate(non_none_rows[:no_of_rows], headers=headers)+'\n')
return headers,non_none_rows
###


def _parse_condition(self, condition, join=False):
#print("[table.py] (_parse_condition) condition:",condition," this is where where happens")
'''
Parse the single string condition and return the value of the column and the operator.

Expand All @@ -561,11 +683,15 @@ def _parse_condition(self, condition, join=False):
if left not in self.column_names:
raise ValueError(f'Condition is not valid (cant find column name)')
coltype = self.column_types[self.column_names.index(left)]

return left, op, coltype(right)
if(op=='between' or op=='not_between'):
#print("[table.py] (_parse_condition) between detected")
return left, op, str(right) #between condition is always a string, type is handled internaly with between function (misc.py).
else:
return left,op,coltype(right)


def _load_from_file(self, filename):
#print("table.py i just _load_from_file")
'''
Load table from a pkl file (not used currently).

Expand Down
Loading