From d992601d870fe05b5bde0d86b113be44ccbfa71f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Attila=20Sz=C3=A1sz?= Date: Wed, 20 Sep 2017 12:23:19 +0100 Subject: [PATCH] Features: 1. Expose page numbers 2. Expose highlight colours 3. Extract and expose annotation colours --- lib/exportannotation.py | 12 ++++++++++-- lib/extracthl.py | 6 ++++-- lib/extracthl2.py | 17 +++++++++++++++-- lib/extractnt.py | 3 +++ lib/extracttags.py | 2 +- lib/tools.py | 11 ++++++++++- menotexport.py | 16 +++++++++++----- 7 files changed, 54 insertions(+), 13 deletions(-) diff --git a/lib/exportannotation.py b/lib/exportannotation.py index e54b6d2..f82f006 100644 --- a/lib/exportannotation.py +++ b/lib/exportannotation.py @@ -44,6 +44,8 @@ def _exportAnnoFile(abpath_out,anno,verbose=True): - @citationkey - Tags: @tag1, @tag2, @tag3... - Ctime: creation time + - Page: page number (ordinal) + - Colour: highlight colour ----------------------------------------------------- # Title of another PDF @@ -56,6 +58,8 @@ def _exportAnnoFile(abpath_out,anno,verbose=True): - @citationkey - Tags: @tag1, @tag2, @tag3... - Ctime: creation time + - Page: page number (ordinal) + - Colour: highlight colour Use tabs in indention, and markup syntax: ">" for highlights, and "-" for notes. @@ -105,8 +109,10 @@ def _exportAnnoFile(abpath_out,anno,verbose=True): \t\t- @{1} \t\t- Tags: {2} \t\t- Ctime: {3} +\t\t- Page: {4} +\t\t- Color: {5} '''.format(*map(conv,[hlstr, hljj.citationkey,\ - tagstr, hljj.ctime])) + tagstr, hljj.ctime, hljj.page, hljj.color])) #outstr=outstr.encode('ascii','replace') outstr=outstr.encode('utf8','replace') @@ -126,8 +132,10 @@ def _exportAnnoFile(abpath_out,anno,verbose=True): \t\t- @{1} \t\t- Tags: {2} \t\t- Ctime: {3} +\t\t- Page: {4} +\t\t- Color: {5} '''.format(*map(conv,[ntstr, ntjj.citationkey,\ - tagstr, ntjj.ctime])) + tagstr, ntjj.ctime, ntjj.page, ntjj.color])) #outstr=outstr.encode('ascii','replace') outstr=outstr.encode('utf8','replace') diff --git a/lib/extracthl.py b/lib/extracthl.py index 0453954..a316739 100644 --- a/lib/extracthl.py +++ b/lib/extracthl.py @@ -32,11 +32,12 @@ #------Store highlighted texts with metadata------ class Anno(object): - def __init__(self,text,ctime=None,title=None,author=None,\ + def __init__(self,text,ctime=None,color=None,title=None,author=None,\ note_author=None,page=None,citationkey=None,tags=None): self.text=text self.ctime=ctime + self.color=color self.title=title self.author=author self.note_author=note_author @@ -56,8 +57,9 @@ def __repr__(self): Creation time: %s Paper title: %s Annotation author: %s -Page: %s Citation key: %s +Annotation color: %s +Page: %s Tags: %s ''' %(self.text, self.ctime, self.title,\ self.note_author, self.page, self.citationkey,\ diff --git a/lib/extracthl2.py b/lib/extracthl2.py index cd8117b..4d6fadc 100644 --- a/lib/extracthl2.py +++ b/lib/extracthl2.py @@ -33,6 +33,7 @@ from numpy import sqrt, argsort from subprocess import Popen, PIPE +from collections import Counter import tools import time import wordfix @@ -58,11 +59,12 @@ def checkPdftotext(): #------Store highlighted texts with metadata------ class Anno(object): - def __init__(self,text,ctime=None,title=None,author=None,\ + def __init__(self,text,ctime=None,color=None,title=None,author=None,\ note_author=None,page=None,citationkey=None,tags=None): self.text=text self.ctime=ctime + self.color=color self.title=title self.author=author self.note_author=note_author @@ -83,10 +85,11 @@ def __repr__(self): Paper title: %s Annotation author: %s Page: %s +Annotation color: %s Citation key: %s Tags: %s ''' %(self.text, self.ctime, self.title,\ - self.note_author, self.page, self.citationkey,\ + self.note_author, self.page, self.color, self.citationkey,\ ', '.join(self.tags)) reprstr=reprstr.encode('ascii','replace') @@ -591,6 +594,14 @@ def getCtime(annos,verbose=True): return ctimes[-1] +#----------------Get the color of annos---------------- +def getColor(annos): + '''Get the most common color for a list of annos + ''' + + colors = Counter([ii['color'] for ii in annos]) + most_common_color_code = colors.most_common(1)[0][0] + return tools.color_labels.get(most_common_color_code, most_common_color_code) @@ -645,6 +656,7 @@ def extractHighlights(filename,anno,verbose=True): #--------------Attach text with meta-------------- textjj=Anno(textjj,\ ctime=getCtime(anno.highlights[ii+1]),\ + color=getColor(anno.highlights[ii+1]),\ title=anno.meta['title'],\ page=ii+1,citationkey=anno.meta['citationkey'],\ tags=anno.meta['tags']) @@ -719,6 +731,7 @@ def extractHighlights2(filename,anno,verbose=True): #--------------Attach text with meta-------------- textjj=Anno(textjj,\ ctime=getCtime(annoii),\ + color=getColor(annoii),\ title=anno.meta['title'],\ page=ii+1,citationkey=anno.meta['citationkey'],\ tags=anno.meta['tags']) diff --git a/lib/extractnt.py b/lib/extractnt.py index 270a50f..04df3e4 100644 --- a/lib/extractnt.py +++ b/lib/extractnt.py @@ -11,6 +11,7 @@ Update time: 2016-04-12 22:09:38. ''' +import tools #-----------------Extract notes----------------- def extractNotes(path,anno,verbose=True): @@ -35,7 +36,9 @@ def extractNotes(path,anno,verbose=True): for pp in anno.ntpages: for noteii in notes[pp]: + note_color=tools.color_labels.get(noteii['color'], noteii['color']) textjj=Anno(noteii['content'], ctime=noteii['cdate'],\ + color=note_color,\ title=meta['title'],\ page=pp,citationkey=meta['citationkey'], note_author=noteii['author'],\ tags=meta['tags']) diff --git a/lib/extracttags.py b/lib/extracttags.py index df39679..7ff7816 100644 --- a/lib/extracttags.py +++ b/lib/extracttags.py @@ -74,7 +74,7 @@ def exportAnno(annodict,outdir,action,verbose=True): os.remove(abpath_out) if verbose: - printHeader('Exporting all taged annotations to:',3) + printHeader('Exporting all tagged annotations to:',3) printInd(abpath_out,4) conv=lambda x:unicode(x) diff --git a/lib/tools.py b/lib/tools.py index 8ef8c2b..31ccbf5 100644 --- a/lib/tools.py +++ b/lib/tools.py @@ -6,7 +6,16 @@ import os import re - +color_labels = { + '#fff5ad': 'Yellow', + '#dcffb0': 'Green', + '#bae2ff': 'Blue', + '#d3c2ff': 'Purple', + '#ffc4fb': 'Pink', + '#ffb5b6': 'Red', + '#ffdeb4': 'Orange', + '#dbdbdb': 'Grey' +} def deu(text): if isinstance(text,str): diff --git a/menotexport.py b/menotexport.py index 6454c58..ed392ff 100644 --- a/menotexport.py +++ b/menotexport.py @@ -244,12 +244,14 @@ def getHighlights(db,results=None,folderid=None,foldername=None,filterdocid=None } where hl1={'rect': bbox, 'cdate': cdate, - 'page':pg} + 'page':pg, + 'color': color} note={'rect': bbox, 'author':author, 'content':txt, 'cdate': cdate, - 'page':pg} + 'page':pg, + 'color': color} Update time: 2016-02-24 00:36:33. ''' @@ -426,6 +428,7 @@ def getNotes(db,results=None,folderid=None,foldername=None,filterdocid=None): FileNotes.author, FileNotes.note, FileNotes.modifiedTime, FileNotes.documentId, + FileNotes.color, DocumentFolders.folderid, Folders.name FROM Files @@ -443,7 +446,8 @@ def getNotes(db,results=None,folderid=None,foldername=None,filterdocid=None): FileNotes.x, FileNotes.y, FileNotes.author, FileNotes.note, FileNotes.modifiedTime, - FileNotes.documentId + FileNotes.documentId, + FileNotes.color FROM Files LEFT JOIN FileNotes ON FileNotes.fileHash=Files.hash @@ -474,8 +478,9 @@ def getNotes(db,results=None,folderid=None,foldername=None,filterdocid=None): txt = r[5] cdate = convert2datetime(r[6]) docid=r[7] + color=r[8] if filterdocid is None: - folder=r[9] + folder=r[10] else: folder=None @@ -483,7 +488,8 @@ def getNotes(db,results=None,folderid=None,foldername=None,filterdocid=None): 'author':author,\ 'content':txt,\ 'cdate': cdate,\ - 'page':pg\ + 'page':pg,\ + 'color':color\ } #------------Save to dict------------