WhiteCatFly
diff --git a/‎.DS_Store‎
10 KB b/‎.DS_Store‎
10 KB
diff --git a/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/README.md‎
Lines changed: 1 addition & 0 deletions b/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/README.md‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/creatKDFile.py‎
Lines changed: 35 additions & 0 deletions b/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/creatKDFile.py‎
Lines changed: 35 additions & 0 deletions
diff --git a/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/manage.py‎
Lines changed: 21 additions & 0 deletions b/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/manage.py‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/search2.py‎
Lines changed: 17 additions & 0 deletions b/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/search2.py‎
Lines changed: 17 additions & 0 deletions
diff --git a/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/settings.py‎
Lines changed: 120 additions & 0 deletions b/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/settings.py‎
Lines changed: 120 additions & 0 deletions
diff --git a/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/similarity.py‎
Lines changed: 127 additions & 0 deletions b/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/similarity.py‎
Lines changed: 127 additions & 0 deletions
diff --git a/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/urls.py‎
Lines changed: 29 additions & 0 deletions b/‎summer/2018202061ZHT/张晓康_段华欣_查寒天_“采蜂”古诗文白话翻译系统/urls.py‎
Lines changed: 29 additions & 0 deletions
@@ -0,0 +1 @@
+普通搜索可翻译出带有平行语料的古文，代码做了加强，勾选suggestion可翻译非平行语料
@@ -0,0 +1,35 @@
+import os
+import time
+import glob
+import json
+import jieba
+import pickle
+import gensim.models
+import numpy as np
+from scipy.linalg import norm
+
+
+model_file = '/home/zht/桌面/Models/word_model.mod'
+model = gensim.models.KeyedVectors.load(model_file)
+
+
+with open('/home/zht/桌面/Models/paraData.pk', 'rb') as f:
+	paradatas = pickle.load(f)
+	paradatas = [data for data in paradatas if len(data['fanyi']) > 2 or len(data['shangxi']) > 2]
+
+def sentence_vector(s):
+	words = jieba.lcut(s)
+	v = np.zeros(100)
+	if not words:
+		return v
+	for word in words:
+		if word not in model:
+			continue
+		v += model[word]
+	return v / norm(v) if norm(v) else v
+
+def creatKd(data):
+    texts = data['fanyi'] + data['shangxi']
+    vecs = [sentence_vector(s) for s in texts.split()]
+    n = len(vecs)
+    return KDTree(np.array(vecs).reshape((n,100)))
@@ -0,0 +1,21 @@
+#!/usr/bin/env python
+"""Django's command-line utility for administrative tasks."""
+import os
+import sys
+
+
+def main():
+    os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'poemsearch.settings')
+    try:
+        from django.core.management import execute_from_command_line
+    except ImportError as exc:
+        raise ImportError(
+            "Couldn't import Django. Are you sure it's installed and "
+            "available on your PYTHONPATH environment variable? Did you "
+            "forget to activate a virtual environment?"
+        ) from exc
+    execute_from_command_line(sys.argv)
+
+
+if __name__ == '__main__':
+    main()
@@ -0,0 +1,17 @@
+# -*- coding: utf-8 -*-
+
+from django.shortcuts import render
+from django.views.decorators import csrf
+
+def post_html(request):
+    # 不能和get一样使用render_to_response必须使用render进行重定向，不然服务端不会设置csrf_token
+    # return render_to_response('post.html')
+    return render(request, 'post.html')
+
+def search_post(request):
+    context = {}
+    # 通过request.GET['name']形式获取post表单内容
+    # result为重定向到的result.html所使用的变量
+    if request.POST:
+        context['result'] = [['1','2'],['3','4'],['5','6'],['1','2'],['3','4'],['5','6'],['1','2'],['3','4'],['5','6']]
+    return render(request, 'post.html', context)
@@ -0,0 +1,120 @@
+"""
+Django settings for poemsearch project.
+
+Generated by 'django-admin startproject' using Django 2.2.2.
+
+For more information on this file, see
+https://docs.djangoproject.com/en/2.2/topics/settings/
+
+For the full list of settings and their values, see
+https://docs.djangoproject.com/en/2.2/ref/settings/
+"""
+
+import os
+
+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
+
+
+# Quick-start development settings - unsuitable for production
+# See https://docs.djangoproject.com/en/2.2/howto/deployment/checklist/
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = '69uskgz-r*l8#ks$=ck@hq_z5(jgk=%#gmy9voahr7azl#d$ms'
+
+# SECURITY WARNING: don't run with debug turned on in production!
+DEBUG = True
+
+ALLOWED_HOSTS = ["127.0.0.1",'*']
+
+
+# Application definition
+
+INSTALLED_APPS = [
+    'django.contrib.admin',
+    'django.contrib.auth',
+    'django.contrib.contenttypes',
+    'django.contrib.sessions',
+    'django.contrib.messages',
+    'django.contrib.staticfiles',
+]
+
+MIDDLEWARE = [
+    'django.middleware.security.SecurityMiddleware',
+    'django.contrib.sessions.middleware.SessionMiddleware',
+    'django.middleware.common.CommonMiddleware',
+    'django.middleware.csrf.CsrfViewMiddleware',
+    'django.contrib.auth.middleware.AuthenticationMiddleware',
+    'django.contrib.messages.middleware.MessageMiddleware',
+    'django.middleware.clickjacking.XFrameOptionsMiddleware',
+]
+
+ROOT_URLCONF = 'poemsearch.urls'
+
+TEMPLATES = [
+    {
+        'BACKEND': 'django.template.backends.django.DjangoTemplates',
+        'DIRS': [os.path.join(BASE_DIR, 'poemsearch/templates')],
+        'APP_DIRS': True,
+        'OPTIONS': {
+            'context_processors': [
+                'django.template.context_processors.debug',
+                'django.template.context_processors.request',
+                'django.contrib.auth.context_processors.auth',
+                'django.contrib.messages.context_processors.messages',
+            ],
+        },
+    },
+]
+
+WSGI_APPLICATION = 'poemsearch.wsgi.application'
+
+
+# Database
+# https://docs.djangoproject.com/en/2.2/ref/settings/#databases
+
+DATABASES = {
+    'default': {
+        'ENGINE': 'django.db.backends.sqlite3',
+        'NAME': os.path.join(BASE_DIR, 'db.sqlite3'),
+    }
+}
+
+
+# Password validation
+# https://docs.djangoproject.com/en/2.2/ref/settings/#auth-password-validators
+
+AUTH_PASSWORD_VALIDATORS = [
+    {
+        'NAME': 'django.contrib.auth.password_validation.UserAttributeSimilarityValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.MinimumLengthValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.CommonPasswordValidator',
+    },
+    {
+        'NAME': 'django.contrib.auth.password_validation.NumericPasswordValidator',
+    },
+]
+
+
+# Internationalization
+# https://docs.djangoproject.com/en/2.2/topics/i18n/
+
+LANGUAGE_CODE = 'en-us'
+
+TIME_ZONE = 'UTC'
+
+USE_I18N = True
+
+USE_L10N = True
+
+USE_TZ = True
+
+
+# Static files (CSS, JavaScript, Images)
+# https://docs.djangoproject.com/en/2.2/howto/static-files/
+
+STATIC_URL = '/static/'
@@ -0,0 +1,127 @@
+import os
+import time
+import glob
+import json
+import jieba
+import pickle
+import gensim.models
+import numpy as np
+from scipy.linalg import norm
+from sklearn.svm import SVC
+
+model_file = '/home/zht/桌面/Models/word_model.mod'
+model = gensim.models.KeyedVectors.load(model_file)
+
+singleword_file = '/home/zht/桌面/poemsearch/poemsearch/singleword.model'
+singleword_model = gensim.models.KeyedVectors.load(singleword_file)
+
+paradatas = []
+nonparadatas = []
+
+paradatas_vecs = []
+nonparadatas_vecs = []
+
+#Load data from cache
+with open('/home/zht/桌面/poemsearch/finalKdtree.pk', 'rb') as f:
+	paradatas = pickle.load(f)
+
+with open('/home/zht/桌面/poemsearch/poemsearch/paradatas_vecs.pk','rb') as f:
+	paradatas_vecs = pickle.load(f)
+
+with open('/home/zht/桌面/Models/nonparaData.pk','rb') as f:
+	nonparadatas = pickle.load(f)
+
+with open('/home/zht/桌面/poemsearch/poemsearch/nonparadatas_vecs.pk','rb') as f:
+	nonparadatas_vecs = pickle.load(f)
+
+
+def sentence_vector(s):
+	words = jieba.lcut(s)
+	v = np.zeros(100)
+	for word in words:
+		if word not in model:
+			continue
+		v += model[word]
+	return v / norm(v) if norm(v) else v
+
+
+
+def search(target = '夏天的荷花'):
+	'''返回在平行语料库中各首诗歌的得分，序号 （递增）'''
+	def getscore(target, kdtree, n = 3):
+		'''返回KDTree中前N近的距离之和'''
+		scores, indexs = kdtree.query(target, n)
+		return sum(scores)
+	target_vec = sentence_vector(target)
+	poemScores = [getscore(target_vec, data['kd']) for data in paradatas]
+	return sorted(enumerate(poemScores), key = lambda x: x[1])
+
+
+def show(n, indexs):
+	res = []
+	n = n if n < 10 else 10
+	for index, sco in indexs[:n]:
+		res.append((paradatas[index]['name'], paradatas[index]['content']))
+	return res
+
+def svmSearch(n,indexs):
+	res = []
+	ps = indexs
+	tags = [1]*3+[0]*20
+	toTrain = [paradatas_vecs[index] for index, val in ps[:3]]
+	toTrain += [paradatas_vecs[index] for index, val in ps[-20:]]
+	clf = SVC(gamma='auto', probability=True)
+	clf.fit(toTrain, tags)
+	pres = clf.predict_proba(nonparadatas_vecs)
+	pres = [(i,p[0])for i,p in enumerate(pres)]
+	pres = sorted(pres, key = lambda x:x[1])
+	n = n if 0 <= n and n < 10 else 10
+	for index, p in pres[:n]:
+		res.append((nonparadatas[index]['name'],nonparadatas[index]['content']))
+	return res
+
+def findFirstMoreVal(array, key):
+	'''
+	返回一个单调递增的数组中第一个大于key的值的索引,[lo, hi)
+	'''
+	lo, hi = 0, len(array)
+	while hi > lo:
+		mid = (lo+hi) // 2
+		if array[mid] > key:
+			hi = mid - 1
+		else:
+			lo = mid + 1
+	return hi
+
+def m3sigma(ps):
+	scores = [data[1] for data in ps[:-100]]
+	m = np.mean(scores)
+	sigma = np.std(scores)
+	return m-3*sigma
+
+
+
+
+
+def Sentence2VectorBySinglewordModel(sentence):
+	'''
+	parameters: sentence 
+	return: a 100 dimension vector that present that string (using singleword_model)
+	'''
+	new_s = ''.join(sentence.split())
+	v = np.zeros(100)
+	for word in new_s:
+		if word not in singleword_model:
+			continue
+		v += singleword_model[word]
+	return v / norm(v) if norm(v) else v
+
+
+
+def updateKDTree(item, vectors):
+	'''update KDTree of item'''
+	from scipy.spatial import KDTree
+	kddata = item['kd'].data
+	for vec in vectors:
+		kddata = np.vstack((kddata,vec))
+	item['kd'] = KDTree(kddata)
@@ -0,0 +1,29 @@
+"""poemsearch URL Configuration
+
+The `urlpatterns` list routes URLs to views. For more information please see:
+    https://docs.djangoproject.com/en/2.2/topics/http/urls/
+Examples:
+Function views
+    1. Add an import:  from my_app import views
+    2. Add a URL to urlpatterns:  path('', views.home, name='home')
+Class-based views
+    1. Add an import:  from other_app.views import Home
+    2. Add a URL to urlpatterns:  path('', Home.as_view(), name='home')
+Including another URLconf
+    1. Import the include() function: from django.urls import include, path
+    2. Add a URL to urlpatterns:  path('blog/', include('blog.urls'))
+"""
+from django.contrib import admin
+from django.urls import path
+from django.conf.urls import url
+from django.views.static import serve
+from . import view, search2
+
+urlpatterns = [
+    path('admin/', admin.site.urls),
+    # url(r'^hello$', view.hello),
+    url(r'^get\.html$', view.get_html),
+    url(r'^get$', view.get),
+    url(r'^search-post',view.search_post),
+    url(r'^book/(?P<path>.*)$', serve, {'document_root': '/home/zht/桌面/poemsearch/poemsearch/templates'})
+]
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+普通搜索可翻译出带有平行语料的古文，代码做了加强，勾选suggestion可翻译非平行语料`