Skip to content

Commit c98a68d

Browse files
authored
AISDK-193: Language id should be supported in python sdk (#86)
1 parent b26c40d commit c98a68d

File tree

8 files changed

+532
-1
lines changed

8 files changed

+532
-1
lines changed
Lines changed: 79 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,79 @@
1+
"""Copyright 2022 REV
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License.
14+
"""
15+
16+
import time
17+
from rev_ai import language_identification_client
18+
19+
20+
# String containing your access token
21+
access_token = "<your_access_token>"
22+
23+
# Create your api client
24+
client = language_identification_client.LanguageIdentificationClient(access_token)
25+
26+
# Submitting a job with a link to the file you want to identify the language of
27+
# Change url to your url
28+
url = "https://www.rev.ai/FTC_Sample_1.mp3"
29+
job = client.submit_job_url(url,
30+
metadata=None,
31+
callback_url=None,
32+
delete_after_seconds=None)
33+
34+
print("Submitted Job")
35+
36+
while True:
37+
# Obtains details of a job in json format
38+
job_details = client.get_job_details(job.id)
39+
status = job_details.status.name
40+
41+
print("Job Status : {}".format(status))
42+
43+
# Checks if the job has been completed. Please note that this is not the recommended way
44+
# of getting job status in a real application. For recommended methods of getting job status
45+
# please see our documentation on callback_urls here:
46+
# https://docs.rev.ai/resources/tutorials/get-started-api-webhooks/
47+
if status == "IN_PROGRESS":
48+
time.sleep(2)
49+
continue
50+
51+
elif status == "FAILED":
52+
print("Job Failed : {}".format(job_details.failure_detail))
53+
break
54+
55+
if status == "COMPLETED":
56+
# Getting a list of current language identification jobs connected with your account
57+
# The optional parameters limits the length of the list.
58+
# starting_after is a job id which causes the removal of
59+
# all jobs from the list which were created before that job
60+
list_of_jobs = client.get_list_of_jobs(limit=None, starting_after=None)
61+
62+
# obtain the most probable language spoken and a list of other possible languages and their confidence scores
63+
result = client.get_result_object(job.id)
64+
print("Top Language : {}".format(result.top_language))
65+
print("Language Confidences : {}".format([{
66+
'language': language_confidence.language,
67+
'confidence': language_confidence.confidence
68+
} for language_confidence in result.language_confidences]))
69+
70+
break
71+
72+
# Use the objects however you please
73+
# Once you are done with the job, you can delete it.
74+
# NOTE : This will PERMANENTLY DELETE all data related to a job. Exercise only
75+
# if you're sure you want to delete the job.
76+
#
77+
# client.delete_job(job.id)
78+
79+
print("Job Submission and Collection Finished.")

src/rev_ai/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@
55

66
from .models import Job, JobStatus, Account, Transcript, Monologue, Element, MediaConfig, \
77
CaptionType, CustomVocabulary, TopicExtractionJob, TopicExtractionResult, Topic, Informant, \
8-
SpeakerName
8+
SpeakerName, LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence
Lines changed: 110 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,110 @@
1+
# -*- coding: utf-8 -*-
2+
"""Client used or interacting with our language identification api"""
3+
4+
import json
5+
from .generic_api_client import GenericApiClient
6+
from .models import LanguageIdentificationJob, LanguageIdentificationResult
7+
8+
try:
9+
from urllib.parse import urljoin
10+
except ImportError:
11+
from urlparse import urljoin
12+
13+
14+
class LanguageIdentificationClient(GenericApiClient):
15+
"""Client for interacting with the Rev AI language identification api"""
16+
17+
# Default version of Rev AI language identification api
18+
api_version = 'v1'
19+
20+
# Default api name of Rev AI language identification api
21+
api_name = 'languageid'
22+
23+
def __init__(self, access_token):
24+
"""Constructor
25+
26+
:param access_token: access token which authorizes all requests and links them to your
27+
account. Generated on the settings page of your account dashboard
28+
on Rev AI.
29+
"""
30+
31+
GenericApiClient.__init__(self, access_token, self.api_name, self.api_version,
32+
LanguageIdentificationJob.from_json,
33+
LanguageIdentificationResult.from_json)
34+
35+
def submit_job_url(
36+
self,
37+
media_url,
38+
metadata=None,
39+
callback_url=None,
40+
delete_after_seconds=None):
41+
"""Submit media as a URL for language identification.
42+
The audio data is downloaded from the URL.
43+
44+
:param media_url: web location of the media file
45+
:param metadata: info to associate with the language identification job
46+
:param callback_url: callback url to invoke on job completion as a webhook
47+
:param delete_after_seconds: number of seconds after job completion when job is auto-deleted
48+
:returns: raw response data
49+
:raises: HTTPError
50+
"""
51+
if not media_url:
52+
raise ValueError('media_url must be provided')
53+
54+
payload = self._enhance_payload({'media_url': media_url},
55+
metadata, callback_url, delete_after_seconds)
56+
57+
return self._submit_job(payload)
58+
59+
def submit_job_local_file(
60+
self,
61+
filename,
62+
metadata=None,
63+
callback_url=None,
64+
delete_after_seconds=None):
65+
"""Submit a local file for language identification.
66+
Note that the content type is inferred if not provided.
67+
68+
:param filename: path to a local file on disk
69+
:param metadata: info to associate with the language identification job
70+
:param callback_url: callback url to invoke on job completion as a webhook
71+
:param delete_after_seconds: number of seconds after job completion when job is auto-deleted
72+
:returns: raw response data
73+
:raises: HTTPError
74+
"""
75+
if not filename:
76+
raise ValueError('filename must be provided')
77+
78+
payload = self._enhance_payload({}, metadata, callback_url, delete_after_seconds)
79+
80+
with open(filename, 'rb') as f:
81+
files = {
82+
'media': (filename, f),
83+
'options': (None, json.dumps(payload, sort_keys=True))
84+
}
85+
86+
response = self._make_http_request(
87+
"POST",
88+
urljoin(self.base_url, 'jobs'),
89+
files=files
90+
)
91+
92+
return LanguageIdentificationJob.from_json(response.json())
93+
94+
def get_result_json(self, id_):
95+
"""Get result of a language identification job as json.
96+
97+
:param id_: id of job to be requested
98+
:returns: job result data as raw json
99+
:raises: HTTPError
100+
"""
101+
return self._get_result_json(id_, {})
102+
103+
def get_result_object(self, id_):
104+
"""Get result of a language identification job as LanguageIdentificationResult object.
105+
106+
:param id_: id of job to be requested
107+
:returns: job result data as LanguageIdentificationResult object
108+
:raises: HTTPError
109+
"""
110+
return self._get_result_object(id_, {})

src/rev_ai/models/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,3 +6,4 @@
66
from .asynchronous import Job, JobStatus, Account, Transcript, Monologue, Element, CaptionType, \
77
SpeakerName
88
from .insights import TopicExtractionJob, TopicExtractionResult, Topic, Informant
9+
from .language_id import LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence
Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
# -*- coding: utf-8 -*-
2+
"""Language Identification Models"""
3+
4+
from .language_id_job import LanguageIdentificationJob
5+
from .language_id_result import LanguageIdentificationResult, LanguageConfidence
Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
# -*- coding: utf-8 -*-
2+
"""Job model"""
3+
4+
from ..asynchronous.job_status import JobStatus
5+
6+
7+
class LanguageIdentificationJob:
8+
def __init__(
9+
self, id_, created_on, status,
10+
completed_on=None,
11+
callback_url=None,
12+
metadata=None,
13+
media_url=None,
14+
failure=None,
15+
failure_detail=None,
16+
processed_duration_seconds=None,
17+
delete_after_seconds=None):
18+
"""
19+
:param id_: unique id of job
20+
:param created_on: date and time at which this job was started
21+
:param status: current job status 'IN_PROGRESS', 'COMPLETED', or 'FAILED'
22+
:param completed_on: date and time at which this job finished being processed
23+
:param callback_url: callback_url if provided
24+
:param metadata: metadata if provided
25+
:param media_url: url of transcribed media if job was submitted this way
26+
:param failure: type of failure if job has failed
27+
:param failure_detail: more detailed failure message if job has failed
28+
:param processed_duration_seconds: duration of file processed in seconds
29+
:param delete_after_seconds: seconds before deletion if provided
30+
"""
31+
32+
self.id = id_
33+
self.created_on = created_on
34+
self.status = status
35+
self.completed_on = completed_on
36+
self.callback_url = callback_url
37+
self.metadata = metadata
38+
self.media_url = media_url
39+
self.failure = failure
40+
self.failure_detail = failure_detail
41+
self.processed_duration_seconds = processed_duration_seconds
42+
self.delete_after_seconds = delete_after_seconds
43+
44+
def __eq__(self, other):
45+
"""Override default equality operator"""
46+
if isinstance(other, self.__class__):
47+
return self.__dict__ == other.__dict__
48+
return False
49+
50+
@classmethod
51+
def from_json(cls, json):
52+
"""Alternate constructor used for parsing json"""
53+
return cls(
54+
json['id'],
55+
json['created_on'],
56+
JobStatus.from_string(json['status']),
57+
completed_on=json.get('completed_on'),
58+
callback_url=json.get('callback_url'),
59+
metadata=json.get('metadata'),
60+
media_url=json.get('media_url'),
61+
failure=json.get('failure'),
62+
failure_detail=json.get('failure_detail'),
63+
processed_duration_seconds=json.get('processed_duration_seconds'),
64+
delete_after_seconds=json.get('delete_after_seconds'),
65+
)
Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,52 @@
1+
# -*- coding: utf-8 -*-
2+
"""Language identification result model"""
3+
4+
5+
class LanguageIdentificationResult:
6+
def __init__(self, top_language, language_confidences):
7+
"""
8+
:param top_language: Language code of predicted language
9+
:param language_confidences: List of all potential languages with their corresponding
10+
confidence scores
11+
"""
12+
self.top_language = top_language
13+
self.language_confidences = language_confidences
14+
15+
def __eq__(self, other):
16+
"""Override default equality operator"""
17+
if isinstance(other, self.__class__):
18+
return all(a == b for a, b in
19+
zip(self.language_confidences, other.language_confidences)) \
20+
and self.top_language == other.top_language
21+
return False
22+
23+
@classmethod
24+
def from_json(cls, json):
25+
"""Alternate constructor used for parsing json"""
26+
return cls(
27+
json['top_language'],
28+
[LanguageConfidence.from_json(language_confidence) for
29+
language_confidence in json.get('language_confidences', [])])
30+
31+
32+
class LanguageConfidence:
33+
def __init__(self, language, confidence):
34+
"""
35+
:param language: Language code of predicted language
36+
:param confidence: Confidence score of the predicted language, ranges from 0.00 to 1.00
37+
"""
38+
self.language = language
39+
self.confidence = confidence
40+
41+
def __eq__(self, other):
42+
"""Override default equality operator"""
43+
if isinstance(other, self.__class__):
44+
return self.language == other.language and self.confidence == other.confidence
45+
return False
46+
47+
@classmethod
48+
def from_json(cls, json):
49+
"""Alternate constructor used for parsing json"""
50+
return cls(
51+
json['language'],
52+
json['confidence'])

0 commit comments

Comments
 (0)