From eea312e256c61a2f957b3e630bed7c0d32b6b8fb Mon Sep 17 00:00:00 2001 From: m Date: Mon, 14 Mar 2016 09:37:35 +0100 Subject: [PATCH 1/3] Corrections to make test/test.rb work --- lib/{google-refine.rb => refine.rb} | 238 ++++++++++++++-------------- test/{dates.txt => dates.csv} | 0 test/test.rb | 2 +- 3 files changed, 120 insertions(+), 120 deletions(-) rename lib/{google-refine.rb => refine.rb} (94%) rename test/{dates.txt => dates.csv} (100%) diff --git a/lib/google-refine.rb b/lib/refine.rb similarity index 94% rename from lib/google-refine.rb rename to lib/refine.rb index 56f501a..f922645 100644 --- a/lib/google-refine.rb +++ b/lib/refine.rb @@ -1,120 +1,120 @@ -require 'httpclient' -require 'cgi' -require 'json' - -class Refine - attr_reader :project_name - attr_reader :project_id - - def self.get_all_project_metadata(server="http://127.0.0.1:3333") - uri = "#{server}/command/core/get-all-project-metadata" - response = HTTPClient.new(server).get(uri) - JSON.parse(response.body) - end - - def initialize(opts = {}) - @server = opts["server"] || "http://127.0.0.1:3333" - @throws_exceptions = opts["throws_exceptions"] || true - - if opts["file_name"] && !opts["file_name"].empty? && opts["project_name"] && !opts["project_name"].empty? - project_name = CGI.escape(opts["project_name"]) - @project_id = create_project(project_name, opts["file_name"]) - @project_name = project_name if @project_id - else - @project_id = opts["project_id"] - - metadata = self.get_project_metadata - @project_name = CGI.escape(metadata["name"]) - end - end - - def create_project(project_name, file_name) - uri = @server + "/command/core/create-project-from-upload" - project_id = false - File.open(file_name) do |file| - body = { - 'project-file' => file, - 'project-name' => project_name - } - response = client.post(uri, body) - url = response.header['Location'] - unless url == [] - project_id = CGI.parse(url[0].split('?')[1])['project'][0] - end - end - raise "Error creating project: #{response}" unless project_id - project_id - end - - def apply_operations(file_name_or_string) - if File.exists?(file_name_or_string) - operations = File.read(file_name_or_string) - else - operations = file_name_or_string - end - - call('apply-operations', 'operations' => file_name_or_string) - end - - def export_rows(opts={}) - format = opts["format"] || 'tsv' - uri = @server + "/command/core/export-rows/#{@project_name}.#{format}" - - body = { - 'engine' => { - "facets" => opts["facets"] || [], - "mode" => "row-based" - }.to_json, - 'options' => opts["options"] || '', - 'project' => @project_id, - 'format' => format - } - - @response = client.post(uri, body) - @response.content - end - - def delete_project - uri = @server + "/command/core/delete-project" - body = { - 'project' => @project_id - } - @response = client.post(uri, body) - JSON.parse(@response.content)['code'] rescue false - end - - # this pattern is pulled from mailchimp/mailchimp-gem - - def call(method, params = {}) - uri = "#{@server}/command/core/#{method}" - params = { "project" => @project_id }.merge(params) - - response = if method.start_with?('get-') - client.get(uri, params) - else - client.post(uri, params) - end - - begin - response = JSON.parse(response.body) - rescue - response = JSON.parse('[' + response.body + ']').first - end - - if @throws_exceptions && response.is_a?(Hash) && response["code"] && response["code"] == "error" - raise "API Error: #{response}" - end - - response - end - - def method_missing(method, *args) - # translate: get_column_info --> get-column-info - call(method.to_s.gsub('_', '-'), *args) - end - - protected - def client - @client ||= HTTPClient.new(@server) - end +require 'httpclient' +require 'cgi' +require 'json' + +class Refine + attr_reader :project_name + attr_reader :project_id + + def self.get_all_project_metadata(server="http://127.0.0.1:3333") + uri = "#{server}/command/core/get-all-project-metadata" + response = HTTPClient.new(server).get(uri) + JSON.parse(response.body) + end + + def initialize(opts = {}) + @server = opts["server"] || "http://127.0.0.1:3333" + @throws_exceptions = opts["throws_exceptions"] || true + + if opts["file_name"] && !opts["file_name"].empty? && opts["project_name"] && !opts["project_name"].empty? + project_name = CGI.escape(opts["project_name"]) + @project_id = create_project(project_name, opts["file_name"]) + @project_name = project_name if @project_id + else + @project_id = opts["project_id"] + + metadata = self.get_project_metadata + @project_name = CGI.escape(metadata["name"]) + end + end + + def create_project(project_name, file_name) + uri = @server + "/command/core/create-project-from-upload" + project_id = false + File.open(file_name) do |file| + body = { + 'project-file' => file, + 'project-name' => project_name + } + response = client.post(uri, body) + url = response.header['Location'] + unless url == [] + project_id = CGI.parse(url[0].split('?')[1])['project'][0] + end + end + raise "Error creating project: #{response}" unless project_id + project_id + end + + def apply_operations(file_name_or_string) + if File.exists?(file_name_or_string) + operations = File.read(file_name_or_string) + else + operations = file_name_or_string + end + + call('apply-operations', 'operations' => operations) + end + + def export_rows(opts={}) + format = opts["format"] || 'tsv' + uri = @server + "/command/core/export-rows/#{@project_name}.#{format}" + + body = { + 'engine' => { + "facets" => opts["facets"] || [], + "mode" => "row-based" + }.to_json, + 'options' => opts["options"] || '', + 'project' => @project_id, + 'format' => format + } + + @response = client.post(uri, body) + @response.content + end + + def delete_project + uri = @server + "/command/core/delete-project" + body = { + 'project' => @project_id + } + @response = client.post(uri, body) + JSON.parse(@response.content)['code'] rescue false + end + + # this pattern is pulled from mailchimp/mailchimp-gem + + def call(method, params = {}) + uri = "#{@server}/command/core/#{method}" + params = { "project" => @project_id }.merge(params) + + response = if method.start_with?('get-') + client.get(uri, params) + else + client.post(uri, params) + end + + begin + response = JSON.parse(response.body) + rescue + response = JSON.parse('[' + response.body + ']').first + end + + if @throws_exceptions && response.is_a?(Hash) && response["code"] && response["code"] == "error" + raise "API Error: #{response}" + end + + response + end + + def method_missing(method, *args) + # translate: get_column_info --> get-column-info + call(method.to_s.gsub('_', '-'), *args) + end + + protected + def client + @client ||= HTTPClient.new(@server) + end end \ No newline at end of file diff --git a/test/dates.txt b/test/dates.csv similarity index 100% rename from test/dates.txt rename to test/dates.csv diff --git a/test/test.rb b/test/test.rb index 0fa18ad..74707ff 100644 --- a/test/test.rb +++ b/test/test.rb @@ -1,6 +1,6 @@ load '../lib/refine.rb' -prj = Refine.new('date cleanup', 'dates.txt') +prj = Refine.new({ "project_name" => 'date cleanup', "file_name" => 'dates.csv' }) prj.apply_operations('operations.json') puts prj.export_rows('csv') prj.delete_project \ No newline at end of file From b7d975ea9a7209045662f6d04ebbc3c5ae307476 Mon Sep 17 00:00:00 2001 From: m Date: Mon, 14 Mar 2016 10:50:48 +0100 Subject: [PATCH 2/3] Add some tests --- test/dates.txt | 4 ++++ test/operations.json | 50 ++++++++++++++++++++++---------------------- test/test_refine.rb | 35 +++++++++++++++++++++++++++++++ 3 files changed, 64 insertions(+), 25 deletions(-) create mode 100644 test/dates.txt create mode 100644 test/test_refine.rb diff --git a/test/dates.txt b/test/dates.txt new file mode 100644 index 0000000..e6d9e07 --- /dev/null +++ b/test/dates.txt @@ -0,0 +1,4 @@ +Date +7 December 2001 +July 1 2002 +10/20/10 \ No newline at end of file diff --git a/test/operations.json b/test/operations.json index f15a08c..e91a046 100644 --- a/test/operations.json +++ b/test/operations.json @@ -1,28 +1,28 @@ [ - { - "op": "core/text-transform", - "description": "Text transform on cells in column Date using expression grel:value.toDate()", - "engineConfig": { - "facets": [], - "mode": "row-based" + { + "op": "core/text-transform", + "description": "Text transform on cells in column Date using expression grel:value.toDate()", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Date", + "expression": "grel:value.toDate()", + "onError": "set-to-blank", + "repeat": false, + "repeatCount": 10 }, - "columnName": "Date", - "expression": "grel:value.toDate()", - "onError": "set-to-blank", - "repeat": false, - "repeatCount": 10 - }, - { - "op": "core/text-transform", - "description": "Text transform on cells in column Date using expression grel:value.datePart(\"year\")", - "engineConfig": { - "facets": [], - "mode": "row-based" - }, - "columnName": "Date", - "expression": "grel:value.datePart(\"year\")", - "onError": "set-to-blank", - "repeat": false, - "repeatCount": 10 - } + { + "op": "core/text-transform", + "description": "Text transform on cells in column Date using expression grel:value.datePart(\"year\")", + "engineConfig": { + "facets": [], + "mode": "row-based" + }, + "columnName": "Date", + "expression": "grel:value.datePart(\"year\")", + "onError": "set-to-blank", + "repeat": false, + "repeatCount": 10 + } ] \ No newline at end of file diff --git a/test/test_refine.rb b/test/test_refine.rb new file mode 100644 index 0000000..f6b7aae --- /dev/null +++ b/test/test_refine.rb @@ -0,0 +1,35 @@ +gem 'minitest' +require 'minitest/autorun' +require_relative '../lib/refine.rb' + +class TestRefine < MiniTest::Unit::TestCase + + def setup + @refine_project = Refine.new({ "project_name" => 'date_cleanup', "file_name" => '../test/dates.txt' }) + end + + def test_refine_initializer_has_instance_variable_project_name + assert_equal 'date_cleanup', @refine_project.project_name + end + + def test_refine_initializer_has_instance_variable_project_id + assert @refine_project.project_id.match(/^[0-9]+$/) + end + + def test_get_all_project_metadata + assert Refine.get_all_project_metadata.instance_of? Hash + end + + def test_apply_operations + assert @refine_project.apply_operations( '../test/operations.json' ) + end + + def test_call + assert @refine_project.call( 'apply-operations', 'operations' => File.read( 'operations.json' ) ) + end + + def after_tests + @refine_project.delete_project + end + +end From 6c87ffb5aaa69d7f4b28613bfb9dea9d26a5472f Mon Sep 17 00:00:00 2001 From: m Date: Mon, 14 Mar 2016 16:09:30 +0100 Subject: [PATCH 3/3] Add examples of usage --- test/examples_of_usage.rb | 109 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 test/examples_of_usage.rb diff --git a/test/examples_of_usage.rb b/test/examples_of_usage.rb new file mode 100644 index 0000000..94cbe4b --- /dev/null +++ b/test/examples_of_usage.rb @@ -0,0 +1,109 @@ +### NOTE: The internal client-server protocol used by OpenRefine is not yet maintained as a stable external API, subject to change. ### +### Therefore, plase indicate changes you notice to kittelmann@sub.uni-goettingen.de ### +### Some examples require cURL http://curl.haxx.se ### +### It is assumed that examples are run from the 'test' directory. Otherwise paths need to be adjusted. +load '../lib/refine.rb' + +########################## +### create initial project +########################## +prj = Refine.new({ 'project_name' => 'date_cleanup', 'file_name' => 'dates.csv' }) + + +########################## +### create another project +########################## +prj.create_project( 'date_cleanup', 'dates.txt' ) # return value = project id, example: 1484090391100 + + +################ +### do something +################ +prj.apply_operations( 'operations.json' ) # return value = status code, example: {'code'=>'ok'} + + +###################### +### extract operations +###################### +prj.get_operations # return value = operations as Hash + +###################################### +### save extracted operations to file: +###################################### +extracted_operations = prj.get_operations +File.open('../test/extracted_operations.json', 'w') do |f| + f.write extracted_operations +end + + +############### +### export data +############### +prj.export_rows # return value = exported data as tsv +prj.export_rows( {'format'=>'tsv'} ) # return value = exported data as tsv +prj.export_rows( {'format'=>'csv'} ) # return value = exported data as csv + +### export data in custom table format +prj.export_rows( { 'options'=>{'separator'=>';','lineSeparator'=>'\n'} } ) # return value = exported data as *sv with semicolon for separator + +### additional options available: +prj.export_rows( { 'options'=>{'separator'=>';','lineSeparator'=>'\n', 'outputColumnHeaders'=>true, 'outputBlankRows'=>true, 'columns'=>[{'name'=>'Date1'}] } } ) +prj.export_rows( { 'options'=>{'separator'=>';','lineSeparator'=>'\n', 'outputColumnHeaders'=>false, 'outputBlankRows'=>false } } ) + +### save extracted data to file: +exported_data = prj.export_rows( {'format'=>'csv'} ) +File.open('../test/exported_data.csv', 'w') do |f| # works + f.write exported_data +end + + +################################## +### export data using own template +################################## + +### construct template as url-encoded string +prefix = '%7B%0D%0A++%22rows%22+%3A+%5B%0D%0A' +suffix = '%0D%0A++%5D%0D%0A%7D' +separator = '%2C%0D%0A' +row_template = '++++%7B%0D%0A++++++%22Column+1%22+%3A+%7B%7Bjsonize%28cells%5B%22Column+1%22%5D.value%29%7D%7D%0D%0A++++%7D' + +### call (using cURL http://curl.haxx.se) +data = "engine=%7B%22facets%22%3A%5B%5D%2C%22mode%22%3A%22row-based%22%7D&project=#{prj.project_id}&format=template&sorting=%7B%22criteria%22%3A%5B%5D%7D&prefix=#{prefix}&suffix=#{suffix}&separator=#{separator}&template=#{row_template}" +system "curl --data #{'"' + data + '"'} http://127.0.0.1:3333/command/core/export-rows/" + +### save extracted data to file: +system "curl --data #{'"' + data + '"'} http://127.0.0.1:3333/command/core/export-rows/ > exported_data.json" + +### let Ruby do the URL encoding of the template +prefix = CGI.escape('{ + "rows" : [ +') +suffix = CGI.escape(' + ] +}') +separator = CGI.escape(', +') +row_template = CGI.escape(' { + "Column 1" : {{jsonize(cells["Column 1"].value)}} + }') +data = "engine=%7B%22facets%22%3A%5B%5D%2C%22mode%22%3A%22row-based%22%7D&project=#{prj.project_id}&format=template&sorting=%7B%22criteria%22%3A%5B%5D%7D&prefix=#{prefix}&suffix=#{suffix}&separator=#{separator}&template=#{row_template}" +system "curl --data #{'"' + data + '"'} http://127.0.0.1:3333/command/core/export-rows/" + + +################# +### rename column +################# +prj.rename_column( { 'oldColumnName'=>'Date', 'newColumnName'=>'Date1' } ) # return value = status Hash, e.g. {"code"=>"ok", "historyEntry"=>{"id"=>1438598625335, "description"=>"Rename column Date to Date1", "time"=>"2015-08-03T12:29:53Z"}} + + +############ +### metadata +############ +prj.get_project_metadata # return value = metadata as Hash +prj.get_all_project_metadata # return value = metadata for all projects as Hash + + +################## +### delete project +################## +prj.delete_project # return value = status, e.g. ok \ No newline at end of file