Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,24 @@ Our `parsed` object should look like -

The `parsed` object includes four objects - `microdata`, `rdfa`, `jsonld` and `metatags`. Since the above HTML does not have any information encoded in `rdfa` and `jsonld`, those two objects are empty.

#### [Errors](#errors)

There is no guarantee against malformed content when working with live pages on
the Internet. This is especially true when webmasters attempt to work with
JSON-LD. You can expect to parse a page with malformed content at some point.

In the case of JSON-LD, the parser will accumulate any parse errors encountered
in an `errors` array if at least one was encountered. Note, they will be full
Node.js error objects, so be prepared to pull off the data you need.

```js
const parsed = WAE().parse(whatWereTheyThinkingHTML)
const parseErrors = parsed.jsonld.errors(err => err.message)
```

Clients can take advantage of checking for the presence of the `errors` property
and respond accordingly.

## Caveat

I wouldn't call it a caveat but rather the parser is strict by design. It might not parse like expected if the HTML isn't encoded correctly, so one might assume the parser is broken.
Expand Down
2 changes: 2 additions & 0 deletions src/parsers/jsonld-parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ export default function (html, config = {}) {
jsonldData[type].push(obj)
})
} catch (e) {
if (!('errors' in jsonldData)) { jsonldData.errors = [] }
jsonldData.errors.push(e)
console.log(`Error in jsonld parse - ${e}`)
}
})
Expand Down
6 changes: 6 additions & 0 deletions test/resources/expectedErrors.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
{
"jsonld": [
"Unexpected end of JSON input",
"Unexpected token ' in JSON at position 11"
]
}
140 changes: 140 additions & 0 deletions test/resources/testErrorPage.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,140 @@
<!-- Example of incomplete JSON object -->
<script type="application/ld+json">
{
"@context": "http://schema.org/",
"@type": "Product",
"name": "Executive Anvil",
"image": "http://www.example.com/anvil_executive.jpg",
"description": "Sleeker than ACME's Classic Anvil, the Executive Anvil is perfect for the business traveler looking for something to drop from a height.",
"mpn": "925872",
"brand": {
"@type": "Thing",
"name": "ACME"
},
"aggregateRating": {
"@type": "AggregateRating",
"ratingValue": "4.4",
"reviewCount": "89"
},

</script>
<!-- Example of JSON with single instead of double-quotes -->
<script type="application/ld+json">
[
{
'@context':'http://schema.org',
'@type':'TheaterEvent',
'name':'Random Theater Show #1',
'startDate':'2016-12-15T19:30:00-06:00',
'location':{
'@type':'Place',
'name':'Theatre'
}
},
{
'@context':'http://schema.org',
'@type':'TheaterEvent',
'name':'Random Theater Show #2',
'startDate':'2016-12-16T19:30:00-06:00',
'location':{
'@type':'Place',
'name':'Theatre'
}
}
]
</script>
<div itemscope itemtype="http://schema.org/Product">
<span itemprop="brand">ACME</span>
<span itemprop="name">Executive Anvil</span>
<img itemprop="image" src="anvil_executive.jpg" alt="Executive Anvil logo" />
<span itemprop="description">Sleeker than ACME's Classic Anvil, the
Executive Anvil is perfect for the business traveler
looking for something to drop from a height.
</span>
Product #: <span itemprop="mpn">925872</span>
<span itemprop="aggregateRating" itemscope itemtype="http://schema.org/AggregateRating">
<span itemprop="ratingValue">4.4</span> stars, based on <span itemprop="reviewCount">89
</span> reviews
</span>

<span itemprop="offers" itemscope itemtype="http://schema.org/Offer">
Regular price: $179.99
<meta itemprop="priceCurrency" content="USD" />
$<span itemprop="price">119.99</span>
(Sale ends <time itemprop="priceValidUntil" datetime="2020-11-05">
5 November!</time>)
Available from: <span itemprop="seller" itemscope itemtype="http://schema.org/Organization">
<span itemprop="name">Executive Objects</span>
</span>
Condition: <link itemprop="itemCondition" href="http://schema.org/UsedCondition"/>Previously owned,
in excellent condition
<link itemprop="availability" href="http://schema.org/InStock"/>In stock! Order now!</span>
</span>
</div>
<!-- example from http://schema.org/Recipe -->
<div itemscope itemtype="http://schema.org/Recipe">
<span itemprop="name">Mom's World Famous Banana Bread</span>
By <span itemprop="author">John Smith</span>,
<meta itemprop="datePublished" content="2009-05-08">May 8, 2009
<meta http-equiv="refresh" content="30">
<img itemprop="image" src="bananabread.jpg"
alt="Banana bread on a plate" />
<span itemprop="description">This classic banana bread recipe comes
from my mom -- the walnuts add a nice texture and flavor to the banana
bread.</span>
Prep Time: <meta itemprop="prepTime" content="PT15M">15 minutes
Cook time: <meta itemprop="cookTime" content="PT1H">1 hour
Yield: <span itemprop="recipeYield">1 loaf</span>
Tags: <link itemprop="suitableForDiet" href="http://schema.org/LowFatDiet" />Low fat
<div itemprop="nutrition"
itemscope itemtype="http://schema.org/NutritionInformation">
Nutrition facts:
<span itemprop="calories">240 calories</span>,
<span itemprop="fatContent">9 grams fat</span>
</div>
Ingredients:
- <span itemprop="recipeIngredient">3 or 4 ripe bananas, smashed</span>
- <span itemprop="recipeIngredient">1 egg</span>
- <span itemprop="recipeIngredient">3/4 cup of sugar</span>
...
Instructions:
<span itemprop="recipeInstructions">
Preheat the oven to 350 degrees. Mix in the ingredients in a bowl. Add
the flour last. Pour the mixture into a loaf pan and bake for one hour.
</span>
140 comments:
<div itemprop="interactionStatistic" itemscope itemtype="http://schema.org/InteractionCounter">
<meta itemprop="interactionType" content="http://schema.org/CommentAction" />
<meta itemprop="userInteractionCount" content="140" />
</div>
From Janel, May 5 -- thank you, great recipe!
...
</div>
<div vocab="http://schema.org/" typeof="Product">
<span property="brand">ACME</span>
<span property="name">Executive Anvil</span>
<img propertyu="image" src="anvil_executive.jpg" alt="Executive Anvil logo" />
<span property="description">Sleeker than ACME's Classic Anvil, the
Executive Anvil is perfect for the business traveler
looking for something to drop from a height.
</span>
Product #: <span property="mpn">925872</span>
<span property="aggregateRating" typeof="AggregateRating">
<span property="ratingValue">4.4</span> stars, based on <span property="reviewCount">89
</span> reviews
</span>

<span property="offers" typeof="Offer">
Regular price: $179.99
<meta property="priceCurrency" content="USD" />
$<span property="price">119.99</span>
(Sale ends <time property="priceValidUntil" datetime="2020-11-05">
5 November!</time>)
Available from: <span property="seller" typeof="Organization">
<span property="name">Executive Objects</span>
</span>
Condition: <link property="itemCondition" href="http://schema.org/UsedCondition"/>Previously owned,
in excellent condition
<link property="availability" content="http://schema.org/InStock"/>In stock! Order now!</span>
</span>
</div>
13 changes: 13 additions & 0 deletions test/test.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ import WAE from '../src'
const fileReader = (fileName) => fs.readFileSync(fileName, { encoding: 'utf-8' })
const expectedResult = JSON.parse(fileReader('test/resources/expectedResult.json'))
const testPage = fileReader('test/resources/testPage.html')
const testErrorPage = fileReader('test/resources/testErrorPage.html')
const expectedErrors = JSON.parse(fileReader('test/resources/expectedErrors.json'))
const { microdata, rdfa, metatags, jsonld } = WAE().parse(testPage)

describe('Web Auto Extractor', function () {
Expand All @@ -25,4 +27,15 @@ describe('Web Auto Extractor', function () {
it('should find embedded meta tags', function () {
assert.deepEqual(metatags, expectedResult.metatags)
})

describe('when there are parse errors', function () {
const { jsonld } = WAE().parse(testErrorPage)

it('should save jsonld parse errors', function () {
assert.equal(
jsonld.errors.length,
expectedErrors.jsonld.length
)
})
})
})