Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
42 commits
Select commit Hold shift + click to select a range
b61afd0
Ignore eclipse project files
rxuriguera Mar 28, 2011
85fc84f
Added realtime search module
rxuriguera Mar 31, 2011
d8379d4
Added .gitignore
rxuriguera Mar 31, 2011
50faee2
Added a Firefox 4 user agent
rxuriguera Mar 31, 2011
73b5ca4
Cleaned up realtime search
rxuriguera Apr 4, 2011
35587fa
Add the ability to extract keywords
rxuriguera Apr 4, 2011
e7d2b31
Make the results more usable by representinig keywords as a list
rxuriguera Apr 4, 2011
0a6a6a7
Udated version number and description
Apr 7, 2011
3f8e429
Removed todo and unused import
Apr 7, 2011
d0f55b3
Changed the way new urls are computed
Apr 8, 2011
3a19c3f
Increased version number
Apr 8, 2011
b41ff4e
quick fix to work with google.com and google.fr
flepied Sep 16, 2011
963a1b8
fix title, url and description extraction
flepied Sep 16, 2011
089e94e
my goal
caozhzh Apr 9, 2013
9c09c45
cp search.py to GeneralSearch.py and change GoogleSearch to GeneralSe…
caozhzh Apr 9, 2013
8baff04
add engine
caozhzh Apr 9, 2013
74ba510
GeneralSearch support baidu,but the code is so ugly to be continue
caozhzh Apr 10, 2013
b4be517
lambda within engin
caozhzh Apr 10, 2013
6318c4a
autosearch.py added to handle command line options and ini
caozhzh Apr 10, 2013
9bbef61
GeneralSearch.conf added
caozhzh Apr 11, 2013
d04c1a1
add google and baidu result conf item
caozhzh Apr 12, 2013
abe2235
multi search engine basiclly ok
caozhzh Apr 12, 2013
291888d
_page change but not tested
caozhzh Apr 13, 2013
f748d9e
autosearch opt ok basiclly
caozhzh Apr 14, 2013
d85770b
use keyword opt
caozhzh Apr 14, 2013
608c19f
keyword and interval ok, try PyCha to chart
caozhzh Apr 15, 2013
0569a1a
autosearch engine, keyword, internal, num option basiclly ok
caozhzh Apr 16, 2013
f052c70
write to file and chart basiclly ok
caozhzh Apr 16, 2013
c9e4881
new result must be ignored
caozhzh Apr 16, 2013
5744cad
new gitignore
caozhzh Apr 16, 2013
c5b0dd7
not ignored but why?
caozhzh Apr 16, 2013
b73f45c
i dont know how to set in gitignore
caozhzh Apr 16, 2013
5106df8
del *.output
caozhzh Apr 16, 2013
56155d6
encoding question ok almostly
caozhzh Apr 18, 2013
e90e5b3
weibo and tq need use api
caozhzh Apr 18, 2013
d6b3492
autosearch.conf try tq and weibo
caozhzh Apr 19, 2013
d06ebdf
try to solve chinese in google in windows
caozhzh Apr 20, 2013
dc2659b
search more bug fixed, and chinese fanti not only in windows
caozhzh Apr 20, 2013
8b7a955
chart in windows ok,but ugly
caozhzh Apr 24, 2013
d0696b0
readme.txt change
caozhzh Sep 6, 2013
e22b80b
Merge branch 'master' of https://github.com/caozhzh/xgoogle into Gene…
kenorb Aug 1, 2014
cdf5e67
Merging README.md from master.
kenorb Aug 1, 2014
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,3 +6,5 @@ xgoogle.egg-info
.project
.pydevproject
.settings
*.output
output.*
6 changes: 6 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,11 @@
xgoogle
=======

Python wrapper to Google Search service.

This is an command line search tool, which designed to fetch search results
from some famous search engine include Google, Baidu, Weibo, Qihoo, etc.

Provide a wrapper for the following services:
* Google Search
* Google Translate
Expand Down
108 changes: 108 additions & 0 deletions autosearch.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
[google]
PAGE_MODE=1
SEARCH_URL=http://www.google.com.hk/search?q=%(query)s&start=%(num)d

SEARCH_URL_0=http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&btnG=Google+Search
NEXT_PAGE_0=http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&start=%(start)d
SEARCH_URL_1=http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&btnG=Google+Search
NEXT_PAGE_1=http://www.google.%(tld)s/search?hl=%(lang)s&q=%(query)s&num=%(num)d&start=%(start)d

page_nums=10

total_tag=div
total_tag_filter={'id': 'resultStats'}

result_tag=li
result_tag_filter={'class': 'g'}
title_tag=a
title_tag_filter={}
desc_tag=span
desc_tag_filter={'class': 'st'}

[baidu]
PAGE_MODE=1
SEARCH_URL=http://www.baidu.com/s?wd=%(query)s&pn=%(num)d&ie=utf-8

page_nums=10

total_tag=span
total_tag_filter={'class': 'nums'}

result_tag=table
result_tag_filter={'class': 'result'}
title_tag=a
title_tag_filter={}
desc_tag=font
desc_tag_filter={}

[qihoo]
PAGE_MODE=2
SEARCH_URL=http://qihoo.com/wenda.php?kw=%(query)s&page=%(num)d

page_nums=10

total_tag=em
total_tag_filter={'id': 'search-result'}

result_tag=dl
result_tag_filter={}
title_tag=a
title_tag_filter={}
desc_tag=dd
desc_tag_filter={'class': 'content'}

encoding=gb2312

[maopu]
PAGE_MODE=1
SEARCH_URL=http://www.baidu.com/s?wd=site:mop.com+%(query)s&pn=%(num)d&ie=utf-8

page_nums=10

total_tag=span
total_tag_filter={'class': 'nums'}

result_tag=table
result_tag_filter={'class': 'result'}
title_tag=a
title_tag_filter={}
desc_tag=font
desc_tag_filter={}

[tianya]
PAGE_MODE=2
SEARCH_URL=http://www.tianya.cn/search/bbs?q=%(query)s&pn=%(num)d

page_nums=10

total_tag=em
total_tag_filter={}

result_container_tag=div
result_container_tag_filter={'class': 'searchListOne'}
result_tag=li
result_tag_filter={}
title_tag=a
title_tag_filter={}
desc_tag=p
desc_tag_filter={}

[weibo]

[tq]
PAGE_MODE=2
SEARCH_URL=http://search.t.qq.com/index.php?k=%(query)s&p=%(num)d

page_nums=15

total_tag=span
total_tag_filter={'class': 'tabNum left'}

result_container_tag=ul
result_container_tag_filter={'id': 'talkList'}
result_tag=li
result_tag_filter={}
title_tag=a
title_tag_filter={'ctype': '2'}
desc_tag=div
desc_tag_filter={'class': 'msgCnt'}
198 changes: 198 additions & 0 deletions autosearch.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,198 @@
#!/bin/python
## coding=utf-8 ##
#-*- encoding: utf8 -*-

### handle options
import sys, getopt, re, os

KEYWORD=""
ENGINE=[]
INTERNAL=0
NUM=10
#PREFERENCE=os.path.join(os.path.dirname(os.path.abspath(__file__)), 'autosearch.conf')
PREFERENCE='autosearch.conf'
FORMAT=['title', 'url', 'desc']
WRITE="autosearch.output"
FILTER=""
CHART=""
SORT=""
VERBOSE=False

ENGINES=['google', 'baidu', 'qihoo', 'maopu', 'tianya', 'weibo', 'tq']
FORMATS=['title', 'url', 'desc']
DATA=[]

ENCODING = sys.getfilesystemencoding()

def opt():
global KEYWORD, ENGINE, INTERNAL, NUM, PREFERENCE, FORMAT, WRITE, FILTER, CHART, SORT, VERBOSE
global ENGINES, FORMATS

blank = re.compile('\s')
opts, args = getopt.getopt(sys.argv[1:], "k:e:i:n:p:o:w:f:c:s:vh",
["keyword=", "engine=", "internal=", "num=", "preference=", "format=", "write=", "filter=", "chart=", "sort=", "verbose", "help"])
for op, value in opts:
if op == "-k" or op == "--keyword":
KEYWORD = value.strip()
elif op == "-e" or op == "--engine":
ENGINE = blank.sub('', value).split(',')
for e in ENGINE:
if not e in ENGINES:
error( "warning: %s is not supported, ignored" % e )
ENGINE.remove(e)
else:
DATA.append([e, [[0,0]]])
elif op == "-i" or op == "--internal":
INTERNAL = float(value)
elif op == "-n" or op == "--num":
NUM = int(value)
elif op == "-p" or op == "--preference":
PREFERENCE = value
elif op == "-o" or op == "--format":
FORMAT = blank.sub('', value).split(',')
for f in FORMAT:
if not f in FORMATS:
error( "warning: format %s is not supported, ignored" % f )
FORMAT.remove(f)
elif op == "-w" or op == "--write":
WRITE = value
elif op == "-f" or op == "--filter":
FILTER = value
elif op == "-c" or op == "--chart":
CHART = value
elif op == "-s" or op == "--sort":
SORT = value
elif op == "-v" or op == "--verbose":
VERBOSE = True
elif op == "-h" or op == "--help":
usage()
sys.exit()
else:
error( "warning: option %s not recognized, ignored" % op )

def usage():
print """autosearch is an automatically tools used in command line, the usage:"""
print """autosearch
-k,--keyword=keyword ---if have blank,put the keyword in "",like this "cars tree"
-e,--engine=google,baidu,qihoo,maopu,tianya,weibo,tq ---at least one of these search engines
-i,--internal=NUMBER ---seconds, default is 0
-n,--num=NUMBER ---topmost search results, default is 10
-p,--preference=FILE ---set preference file name, default is autosearch.conf
-o,--format=title,url,desc ---set output formate, default is title,url,desc
-w,--write=FILE ---set output file, default is autosearch.output
-f,--filter=STRING ---set filter string
-c,--chart ---got output chart
-s,--sort ---set sort type
-v,--verbose ---if set, print the output to screen also
-h,--help ---this help information"""

def error(msg):
print 50*'*'
print "*** %s" % msg
print 50*'*'
usage()
sys.exit()

def try_output(content):
#s = str.format( content )
if VERBOSE:
print content
if WRITE:
with open(WRITE, 'a+') as f:
f.write(content.encode('utf-8'))
f.write("\n")

import cairo
import pycha
import pycha.bar
def try_chart():
global DATA
#print DATA

if CHART=='':
return

width, height = (500,400)
surface = cairo.ImageSurface(cairo.FORMAT_ARGB32, width, height)
chart = pycha.bar.VerticalBarChart(surface)
chart.addDataset( DATA )
chart.render()
surface.write_to_png('output.png')

try:
opt()
except Exception, e:
error( "error: %s" % e )
usage()
sys.exit()
else:
pass
finally:
pass

if KEYWORD=='':
error( "error: no keyword to query, exit!!!" )

if len(ENGINE)==0:
error( "error: must assign at least 1 search engine!!!" )

if not os.path.exists(PREFERENCE):
error( "error: preference file %s not exists!!!" % PREFERENCE )

if WRITE:
with open(WRITE, 'w', ) as f:
pass

### main logic
from xgoogle.GeneralSearch import GeneralSearch
import time
import string
import time

start_time = 1
while True:
for e in ENGINE:
gs=GeneralSearch(KEYWORD.decode(ENCODING), e, PREFERENCE)
results = gs.get_results()
print gs.num_results
#print gs._last_search_url

#s1 = str.format( "%s: %d results of \"" % ( e.upper(), gs.num_results ) )
#s2 = str.format( "\" --- %s" % ( time.strftime("%Y-%m-%d %X", time.localtime())) )
#s = s1 + KEYWORD.decode(ENCODING) + s2
#try_output( s )
try_output( "%s: %d results of \"%s\" --- %s" % ( e.upper(), gs.num_results, KEYWORD.decode(ENCODING), time.strftime("%Y-%m-%d %X", time.localtime())) )
try_output( 80*'-' )

index = ENGINE.index(e)
if DATA[index][0]==e:
#DATA[index][1].append([time.strftime("%Y-%m-%d %X", time.localtime()), gs.num_results])
DATA[index][1].append([start_time, gs.num_results])

count=1
while True:
for r in results:
if count>NUM:
break
try_output( "results[%d]: " % count )
for k in FORMAT:
s = "r.%s" % k
c = eval( s )

try_output( "%s" % c )
try_output( 80*'+' )
count=count+1
if count>NUM:
break
results = gs.get_results()

del gs

try_chart()

if INTERNAL:
print "info: loop search every %d seconds, press CTRL+C to exit." % INTERNAL
time.sleep(string.atof(INTERNAL))
start_time += INTERNAL
else:
sys.exit()
Loading