-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathapp.properties
More file actions
42 lines (42 loc) · 1.81 KB
/
app.properties
File metadata and controls
42 lines (42 loc) · 1.81 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
# the directory to process
DATA_DIR=/dir/to/data/set/here
# the directory for train documents. must be within DATA_DIR
TRAIN_DIR=source-documents/
# the directory for test documents. must be within DATA_DIR
TEST_DIR=suspicious-documents/
# output directory where the results are saved
RESULTS_DIR=results/
# the number of sentence passages retrieved for each suspicious document during
# the candidate retrieval phase
RETRIEVAL_COUNT=150
# The graph edit distance threshold which defines whether or not two sentences are plagiarised or not
PLAGIARISM_THRESHOLD=0.3
# The merge distance which determines whether or not two adjacent passages
# should be merged during post-processing
MERGE_DISTANCE=1500
# The parameter input for Maltparser input
MALT_PARAMS=-c engmalt.linear-1.7.mco -m parse -w . -lfi parser.log
# The parameter input for Stanford pos-tagger
POSTAGGER_PARAMS=english-left3words-distsim.tagger
# The number of threads used for POS-tagging during pre-processing
POSTAGGER_THREADS=10
# The number of threads used for dependency parsing during pre-processing
MALTPARSER_THREADS=10
# The number of threads used for building index
INDEX_BUILDER_THREADS=26
# The directory where the candidate retrieval index is saved
INDEX_DIR=lucene/
# The number of threads used for candidate retrieval phase
SENTENCE_RETRIEVAL_THREADS=25
# The number of threads used for the detailed analysis phase
PLAGIARISM_SEARCHER_THREADS=30
# Path to a csv files containing pos-tag substitute weights
POS_SUB_FILE=resources/pos_sub.csv
# Path to a csv files containing pos-tag insert and delete weights
POS_INSDEL_FILE=resources/pos_insdel.csv
# Path to a csv files containing deprel insert and delete weights
DEPREL_INSDEL_FILE=resources/deprel_insdel.csv
# The name of the mongodb database
DBNAME=nlp-graphs
# Location of the mongodb database
DBLOCATION=localhost