From cfa43671318d641dc41e303abbddbfc39d168089 Mon Sep 17 00:00:00 2001 From: Sol Eun Date: Fri, 28 Dec 2012 22:16:19 -0800 Subject: [PATCH 1/2] removing 'CC' in test --- .../hadoop/mapred/TestArcRecord.java | 98 +++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 test/java/org/commoncrawl/hadoop/mapred/TestArcRecord.java diff --git a/test/java/org/commoncrawl/hadoop/mapred/TestArcRecord.java b/test/java/org/commoncrawl/hadoop/mapred/TestArcRecord.java new file mode 100644 index 0000000..2247042 --- /dev/null +++ b/test/java/org/commoncrawl/hadoop/mapred/TestArcRecord.java @@ -0,0 +1,98 @@ +package org.commoncrawl.hadoop.mapred; + +import java.io.ByteArrayInputStream; +import java.io.InputStream; +import java.io.IOException; +import java.lang.StringBuilder; + +import junit.framework.TestCase; +import static junit.framework.Assert.*; + +/** + * Unit Tests for jUnit 3.8 + */ +public class TestArcRecord extends TestCase { + + ArcRecord r; + + /* + public static junit.framework.Test suite() { + return new junit.framework.JUnit4TestAdapter(TestArcRecord.class); + } + */ + + public InputStream getPayload1() + throws Exception { + + StringBuilder s = new StringBuilder(); + + s.setLength(0); + s.append("\n"); + s.append(" \n"); + s.append(" This is a web page!\n"); + s.append(" \n"); + s.append(" \n"); + s.append("

This is some content!

\n"); + s.append(" \n"); + s.append(""); + + String content = s.toString(); + + s.setLength(0); + s.append("HTTP/1.1 200 OK\r\n"); + s.append("Date: Fri, 31 Dec 1999 23:59:59 GMT\r\n"); + s.append("Content-Type: text/html; charset=utf-8\r\n"); + s.append("\r\n"); + s.append(content); + s.insert(0, "http://www.example.com/path/file.php?param=123,456%20789 123.123.123.123 20120235131415 text/html "+(s.length()-3)+"\n"); + + return new ByteArrayInputStream(s.toString().getBytes("UTF-8")); + } + + public void setUp() { + r = new ArcRecord(); + } + + public void test_getIpAddress() + throws Exception { + r.readFrom(this.getPayload1()); + assertEquals(r.getIpAddress(), "123.123.123.123"); + } + + public void test_getHttpHeaders() + throws Exception { + r.readFrom(this.getPayload1()); + assertEquals(r.getHttpResponse().getFirstHeader("Content-Type").getValue(), "text/html; charset=utf-8"); + } + + public void test_getHttpResponse_getEntity() + throws Exception { + + r.readFrom(this.getPayload1()); + assertNotNull(r.getHttpResponse().getEntity()); + + byte[] buffer = new byte[1000]; + r.getHttpResponse().getEntity().getContent().read(buffer, 0, 1000); + + StringBuilder s = new StringBuilder(); + s.append("\n"); + s.append(" \n"); + s.append(" This is a web page!\n"); + s.append(" \n"); + s.append(" \n"); + s.append("

This is some content!

\n"); + s.append(" \n"); + s.append(""); + + String v1 = s.toString(); + String v2 = new String(buffer, "UTF-8"); + + assertEquals(v1.trim(), v2.trim()); + } + + public void test_getParsedHTML() + throws Exception { + r.readFrom(this.getPayload1()); + assertNotNull(r.getParsedHTML()); + } +} From 3b09de3c37ee8b5e54634a8395e11a313979757d Mon Sep 17 00:00:00 2001 From: Sol Eun Date: Fri, 28 Dec 2012 22:18:46 -0800 Subject: [PATCH 2/2] removing old file --- .../hadoop/mapred/TestArcRecordCC.java | 98 ------------------- 1 file changed, 98 deletions(-) delete mode 100644 test/java/org/commoncrawl/hadoop/mapred/TestArcRecordCC.java diff --git a/test/java/org/commoncrawl/hadoop/mapred/TestArcRecordCC.java b/test/java/org/commoncrawl/hadoop/mapred/TestArcRecordCC.java deleted file mode 100644 index dbff02a..0000000 --- a/test/java/org/commoncrawl/hadoop/mapred/TestArcRecordCC.java +++ /dev/null @@ -1,98 +0,0 @@ -package org.commoncrawl.hadoop.mapred; - -import java.io.ByteArrayInputStream; -import java.io.InputStream; -import java.io.IOException; -import java.lang.StringBuilder; - -import junit.framework.TestCase; -import static junit.framework.Assert.*; - -/** - * Unit Tests for jUnit 3.8 - */ -public class TestArcRecordCC extends TestCase { - - ArcRecordCC r; - - /* - public static junit.framework.Test suite() { - return new junit.framework.JUnit4TestAdapter(TestArcRecordCC.class); - } - */ - - public InputStream getPayload1() - throws Exception { - - StringBuilder s = new StringBuilder(); - - s.setLength(0); - s.append("\n"); - s.append(" \n"); - s.append(" This is a web page!\n"); - s.append(" \n"); - s.append(" \n"); - s.append("

This is some content!

\n"); - s.append(" \n"); - s.append(""); - - String content = s.toString(); - - s.setLength(0); - s.append("HTTP/1.1 200 OK\r\n"); - s.append("Date: Fri, 31 Dec 1999 23:59:59 GMT\r\n"); - s.append("Content-Type: text/html; charset=utf-8\r\n"); - s.append("\r\n"); - s.append(content); - s.insert(0, "http://www.example.com/path/file.php?param=123,456%20789 123.123.123.123 20120235131415 text/html "+(s.length()-3)+"\n"); - - return new ByteArrayInputStream(s.toString().getBytes("UTF-8")); - } - - public void setUp() { - r = new ArcRecordCC(); - } - - public void test_getIpAddress() - throws Exception { - r.readFrom(this.getPayload1()); - assertEquals(r.getIpAddress(), "123.123.123.123"); - } - - public void test_getHttpHeaders() - throws Exception { - r.readFrom(this.getPayload1()); - assertEquals(r.getHttpResponse().getFirstHeader("Content-Type").getValue(), "text/html; charset=utf-8"); - } - - public void test_getHttpResponse_getEntity() - throws Exception { - - r.readFrom(this.getPayload1()); - assertNotNull(r.getHttpResponse().getEntity()); - - byte[] buffer = new byte[1000]; - r.getHttpResponse().getEntity().getContent().read(buffer, 0, 1000); - - StringBuilder s = new StringBuilder(); - s.append("\n"); - s.append(" \n"); - s.append(" This is a web page!\n"); - s.append(" \n"); - s.append(" \n"); - s.append("

This is some content!

\n"); - s.append(" \n"); - s.append(""); - - String v1 = s.toString(); - String v2 = new String(buffer, "UTF-8"); - - assertEquals(v1.trim(), v2.trim()); - } - - public void test_getParsedHTML() - throws Exception { - r.readFrom(this.getPayload1()); - assertNotNull(r.getParsedHTML()); - } -}