Skip to content

Commit 5d70d70

Browse files
authored
V1.0.1 (#2)
* Stable Version - 1.0.1 * Version Update
1 parent c3f8c80 commit 5d70d70

File tree

15 files changed

+818
-0
lines changed

15 files changed

+818
-0
lines changed

.gitignore

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
/.idea/
2+
/dist/
3+
/build/
4+
/*.egg-info/

ExampleCode.ipynb

Lines changed: 315 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,315 @@
1+
{
2+
"nbformat": 4,
3+
"nbformat_minor": 0,
4+
"metadata": {
5+
"colab": {
6+
"name": "ExtractTable - Advanced Code Usage.ipynb",
7+
"provenance": [],
8+
"collapsed_sections": []
9+
},
10+
"kernelspec": {
11+
"name": "python3",
12+
"display_name": "Python 3"
13+
},
14+
"pycharm": {
15+
"stem_cell": {
16+
"cell_type": "raw",
17+
"source": [],
18+
"metadata": {
19+
"collapsed": false
20+
}
21+
}
22+
}
23+
},
24+
"cells": [
25+
{
26+
"cell_type": "code",
27+
"metadata": {
28+
"id": "NhVhMrQ0ZdQr",
29+
"colab_type": "code",
30+
"colab": {}
31+
},
32+
"source": [
33+
"# !pip install -U ExtractTable"
34+
],
35+
"execution_count": 0,
36+
"outputs": []
37+
},
38+
{
39+
"cell_type": "code",
40+
"metadata": {
41+
"id": "2aIaghfeZnQr",
42+
"colab_type": "code",
43+
"colab": {}
44+
},
45+
"source": [
46+
"from ExtractTable import *"
47+
],
48+
"execution_count": 0,
49+
"outputs": []
50+
},
51+
{
52+
"cell_type": "code",
53+
"metadata": {
54+
"id": "LJL_ZyYzZsFY",
55+
"colab_type": "code",
56+
"colab": {}
57+
},
58+
"source": [
59+
"api_key = YOUR_APIKEY_HERE"
60+
],
61+
"execution_count": 0,
62+
"outputs": []
63+
},
64+
{
65+
"cell_type": "markdown",
66+
"metadata": {
67+
"id": "bwtpzTJxZHRi",
68+
"colab_type": "text"
69+
},
70+
"source": [
71+
"**Create Session** with your API Key"
72+
]
73+
},
74+
{
75+
"cell_type": "code",
76+
"metadata": {
77+
"id": "Bfw5GTNvZGv8",
78+
"colab_type": "code",
79+
"colab": {}
80+
},
81+
"source": [
82+
"et_sess = ExtractTable(api_key)"
83+
],
84+
"execution_count": 0,
85+
"outputs": []
86+
},
87+
{
88+
"cell_type": "markdown",
89+
"metadata": {
90+
"id": "On4_X8v3Zk3v",
91+
"colab_type": "text"
92+
},
93+
"source": [
94+
"**Validate** the Key and check the plan usage"
95+
]
96+
},
97+
{
98+
"cell_type": "code",
99+
"metadata": {
100+
"id": "a7EPvvvMZ0Ub",
101+
"colab_type": "code",
102+
"colab": {}
103+
},
104+
"source": [
105+
"usage = et_sess.check_usage()"
106+
],
107+
"execution_count": 0,
108+
"outputs": []
109+
},
110+
{
111+
"cell_type": "markdown",
112+
"metadata": {
113+
"id": "sovuclERjRqy",
114+
"colab_type": "text"
115+
},
116+
"source": [
117+
"*If there is no error encountered in the above cell, it means we have a valid API key. Now, get started by checking the usage and trigger the file for processing*"
118+
]
119+
},
120+
{
121+
"cell_type": "code",
122+
"metadata": {
123+
"id": "HT97IP8MZ9WF",
124+
"colab_type": "code",
125+
"colab": {
126+
"base_uri": "https://localhost:8080/",
127+
"height": 34
128+
},
129+
"outputId": "65f9eb37-cdec-425f-905f-c38c7d778892"
130+
},
131+
"source": [
132+
"print(usage)"
133+
],
134+
"execution_count": 6,
135+
"outputs": [
136+
{
137+
"output_type": "stream",
138+
"text": [
139+
"{'credits': 1000, 'queued': 2, 'used': 533}\n"
140+
],
141+
"name": "stdout"
142+
}
143+
]
144+
},
145+
{
146+
"cell_type": "markdown",
147+
"metadata": {
148+
"id": "-XqbBoB-i3pi",
149+
"colab_type": "text"
150+
},
151+
"source": [
152+
"**credits**: Total number credits attached to the API Key\n",
153+
"\n",
154+
"**queued** : Number of triggered jobs that were left \"IN_PROGRESS\", not yet retrieved\n",
155+
"\n",
156+
"**used** : Number of credits already used "
157+
]
158+
},
159+
{
160+
"cell_type": "markdown",
161+
"metadata": {
162+
"id": "oUnBFxYiZ1Ka",
163+
"colab_type": "text"
164+
},
165+
"source": [
166+
"**Trigger** the process to extract tabular data from the file"
167+
]
168+
},
169+
{
170+
"cell_type": "code",
171+
"metadata": {
172+
"id": "P_xzVgHmZ9sw",
173+
"colab_type": "code",
174+
"colab": {}
175+
},
176+
"source": [
177+
"filepath = r'testimages/chervolet.jpg'\n",
178+
"table_data = et_sess.process_file(filepath)"
179+
],
180+
"execution_count": 0,
181+
"outputs": []
182+
},
183+
{
184+
"cell_type": "code",
185+
"metadata": {
186+
"id": "XrXBfENfZ2AI",
187+
"colab_type": "code",
188+
"colab": {
189+
"base_uri": "https://localhost:8080/",
190+
"height": 255
191+
},
192+
"outputId": "5b97dec4-a399-44d9-c087-f86430b295e5"
193+
},
194+
"source": [
195+
"table_data # Notice the default output is a pandas dataframe"
196+
],
197+
"execution_count": 13,
198+
"outputs": [
199+
{
200+
"output_type": "execute_result",
201+
"data": {
202+
"text/plain": [
203+
"[ 0 1 2 3 4\n",
204+
" 0 vendor Model Price Mileage VIN Code\n",
205+
" 1 Chevrolet Corvette 17226 25965.0 ILLAKAWAZDZ\n",
206+
" 2 Chevrolet Corvette 34229 46429.0 RCPNSRYGXON\n",
207+
" 3 Chevrolet Corvette 27982 50209.0 NWLGCEVEHGI\n",
208+
" 4 Chevrolet Corvette 51825 72998.0 NGVZSCIZGSM\n",
209+
" 5 Chevrolet Corvette 52845 34364.0 PSDRUYYOIJG\n",
210+
" 6 Chevrolet Malibu 37874 37273.0 VLFPQPWNEFD\n",
211+
" 7 Chevrolet Malibu 15600 71441.0 EXLJGDWOZS\n",
212+
" 8 Chevrolet Malibu 52447 46700.0 NLMGJZAKBRD\n",
213+
" 9 Chevrolet Malibu 27129 36254.0 OIPFUIENLEHS)\n",
214+
" 10 Chevrolet Malibu 28846 77162.0 WRCOOFREZLL\n",
215+
" 11 Chevrolet Malibu 46165 60590.0 HUFTTHQHSFJF\n",
216+
" 12 Chevrolet Malibu 18263 377900 MHNAFSHVD]"
217+
]
218+
},
219+
"metadata": {
220+
"tags": []
221+
},
222+
"execution_count": 13
223+
}
224+
]
225+
},
226+
{
227+
"cell_type": "markdown",
228+
"metadata": {
229+
"id": "TYLfNBQ6bL64",
230+
"colab_type": "text"
231+
},
232+
"source": [
233+
"**Whatelse** is in the store.\n",
234+
"- check the latest Actual ServerResponse attached to the session with `et_sess.ServerResponse.json()`\n",
235+
"- check out list of available output formats `ExtractTable._OUTPUT`"
236+
]
237+
},
238+
{
239+
"cell_type": "markdown",
240+
"metadata": {
241+
"id": "znSqvSJbd2hv",
242+
"colab_type": "text"
243+
},
244+
"source": [
245+
"Check the **latest ServerResponse** in the processs"
246+
]
247+
},
248+
{
249+
"cell_type": "code",
250+
"metadata": {
251+
"id": "cChDS1g3ds0U",
252+
"colab_type": "code",
253+
"colab": {}
254+
},
255+
"source": [
256+
"et_sess.ServerResponse.json()"
257+
],
258+
"execution_count": 0,
259+
"outputs": []
260+
},
261+
{
262+
"cell_type": "markdown",
263+
"metadata": {
264+
"id": "Fwu-Cn7vbEMo",
265+
"colab_type": "text"
266+
},
267+
"source": [
268+
"Check out the list of all **available output formats**"
269+
]
270+
},
271+
{
272+
"cell_type": "code",
273+
"metadata": {
274+
"id": "lMlTzUsIbpx4",
275+
"colab_type": "code",
276+
"colab": {
277+
"base_uri": "https://localhost:8080/",
278+
"height": 34
279+
},
280+
"outputId": "68ed73b1-69a4-4f16-e909-d69409e508bd"
281+
},
282+
"source": [
283+
"ExtractTable._OUTPUT_FORMATS"
284+
],
285+
"execution_count": 16,
286+
"outputs": [
287+
{
288+
"output_type": "execute_result",
289+
"data": {
290+
"text/plain": [
291+
"{'csv', 'dataframe', 'df', 'dict', 'json'}"
292+
]
293+
},
294+
"metadata": {
295+
"tags": []
296+
},
297+
"execution_count": 16
298+
}
299+
]
300+
},
301+
{
302+
"cell_type": "code",
303+
"metadata": {
304+
"id": "glsrlvpydZaz",
305+
"colab_type": "code",
306+
"colab": {}
307+
},
308+
"source": [
309+
""
310+
],
311+
"execution_count": 0,
312+
"outputs": []
313+
}
314+
]
315+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
"""
2+
To validate the files at Client End; Fail Faster
3+
"""
4+
import typing as ty
5+
import os
6+
from ..exceptions import ClientFileError
7+
8+
9+
class CheckFile:
10+
__SUPPORTED_EXTENSIONS__: tuple = tuple({'.pdf', '.jpeg', '.jpg', '.png'})
11+
__THRESHOLD_SIZE__: int = 4 # Megabytes
12+
13+
def __init__(self, filepath: ty.Union[os.PathLike, str]):
14+
self.filepath = filepath
15+
self.type_error()
16+
self.size_error()
17+
18+
def type_error(self) -> ty.Union[Exception, None]:
19+
"""To check file extension"""
20+
if self.filepath.lower().endswith(self.__SUPPORTED_EXTENSIONS__):
21+
return
22+
raise ClientFileError(Message=f"Allowed file types are {self.__SUPPORTED_EXTENSIONS__}")
23+
24+
def size_error(self) -> ty.Union[Exception, None]:
25+
# 1027 to create some buffer
26+
if os.stat(self.filepath).st_size <= self.__THRESHOLD_SIZE__*1027*1027:
27+
return
28+
raise ClientFileError(Message=f"File Size greater than the threshold {self.__THRESHOLD_SIZE__} Mb.")

ExtractTable/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
from .client import ExtractTable

ExtractTable/__version__.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
VERSION = (1, 0, 1)
2+
PRERELEASE = None # "alpha", "beta" or "rc"
3+
REVISION = None
4+
5+
6+
def generate_version(version, prerelease=None, revision=None):
7+
version_parts = [".".join(map(str, version))]
8+
if prerelease is not None:
9+
version_parts.append("-{}".format(prerelease))
10+
if revision is not None:
11+
version_parts.append(".{}".format(revision))
12+
return "".join(version_parts)
13+
14+
15+
__title__ = "ExtractTable"
16+
__description__ = "Extract tabular data from images and scanned PDFs"
17+
__url__ = "https://github.com/ExtractTable/ExtractTable-py"
18+
__version__ = generate_version(VERSION, prerelease=PRERELEASE, revision=REVISION)
19+
__author__ = "Saradhi"
20+
__author_email__ = "saradhi@extracttable.com"
21+
__license__ = "Apache License 2.0"

0 commit comments

Comments
 (0)