AISDK-193: Language id should be supported in python sdk (#86)

seanlam8 · web-flow · commit c98a68dab610 · 2022-04-29T14:42:15.000-07:00
diff --git a/examples/language_identification_example.py b/examples/language_identification_example.py
@@ -0,0 +1,79 @@
+"""Copyright 2022 REV
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+"""
+
+import time
+from rev_ai import language_identification_client
+
+
+# String containing your access token
+access_token = "<your_access_token>"
+
+# Create your api client
+client = language_identification_client.LanguageIdentificationClient(access_token)
+
+# Submitting a job with a link to the file you want to identify the language of
+# Change url to your url
+url = "https://www.rev.ai/FTC_Sample_1.mp3"
+job = client.submit_job_url(url,
+                            metadata=None,
+                            callback_url=None,
+                            delete_after_seconds=None)
+
+print("Submitted Job")
+
+while True:
+    # Obtains details of a job in json format
+    job_details = client.get_job_details(job.id)
+    status = job_details.status.name
+
+    print("Job Status : {}".format(status))
+
+    # Checks if the job has been completed. Please note that this is not the recommended way
+    # of getting job status in a real application. For recommended methods of getting job status
+    # please see our documentation on callback_urls here:
+    # https://docs.rev.ai/resources/tutorials/get-started-api-webhooks/
+    if status == "IN_PROGRESS":
+        time.sleep(2)
+        continue
+
+    elif status == "FAILED":
+        print("Job Failed : {}".format(job_details.failure_detail))
+        break
+
+    if status == "COMPLETED":
+        # Getting a list of current language identification jobs connected with your account
+        # The optional parameters limits the length of the list.
+        # starting_after is a job id which causes the removal of
+        # all jobs from the list which were created before that job
+        list_of_jobs = client.get_list_of_jobs(limit=None, starting_after=None)
+
+        # obtain the most probable language spoken and a list of other possible languages and their confidence scores
+        result = client.get_result_object(job.id)
+        print("Top Language : {}".format(result.top_language))
+        print("Language Confidences : {}".format([{
+            'language': language_confidence.language,
+            'confidence': language_confidence.confidence
+        } for language_confidence in result.language_confidences]))
+
+        break
+
+# Use the objects however you please
+# Once you are done with the job, you can delete it.
+# NOTE : This will PERMANENTLY DELETE all data related to a job. Exercise only
+# if you're sure you want to delete the job.
+#
+# client.delete_job(job.id)
+
+print("Job Submission and Collection Finished.")
diff --git a/src/rev_ai/__init__.py b/src/rev_ai/__init__.py
@@ -5,4 +5,4 @@
 
 from .models import Job, JobStatus, Account, Transcript, Monologue, Element, MediaConfig, \
     CaptionType, CustomVocabulary, TopicExtractionJob, TopicExtractionResult, Topic, Informant, \
-    SpeakerName
+    SpeakerName, LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence
diff --git a/src/rev_ai/language_identification_client.py b/src/rev_ai/language_identification_client.py
@@ -0,0 +1,110 @@
+# -*- coding: utf-8 -*-
+"""Client used or interacting with our language identification api"""
+
+import json
+from .generic_api_client import GenericApiClient
+from .models import LanguageIdentificationJob, LanguageIdentificationResult
+
+try:
+    from urllib.parse import urljoin
+except ImportError:
+    from urlparse import urljoin
+
+
+class LanguageIdentificationClient(GenericApiClient):
+    """Client for interacting with the Rev AI language identification api"""
+
+    # Default version of Rev AI language identification api
+    api_version = 'v1'
+
+    # Default api name of Rev AI language identification api
+    api_name = 'languageid'
+
+    def __init__(self, access_token):
+        """Constructor
+
+        :param access_token: access token which authorizes all requests and links them to your
+                             account. Generated on the settings page of your account dashboard
+                             on Rev AI.
+        """
+
+        GenericApiClient.__init__(self, access_token, self.api_name, self.api_version,
+                                  LanguageIdentificationJob.from_json,
+                                  LanguageIdentificationResult.from_json)
+
+    def submit_job_url(
+            self,
+            media_url,
+            metadata=None,
+            callback_url=None,
+            delete_after_seconds=None):
+        """Submit media as a URL for language identification.
+        The audio data is downloaded from the URL.
+
+        :param media_url: web location of the media file
+        :param metadata: info to associate with the language identification job
+        :param callback_url: callback url to invoke on job completion as a webhook
+        :param delete_after_seconds: number of seconds after job completion when job is auto-deleted
+        :returns: raw response data
+        :raises: HTTPError
+        """
+        if not media_url:
+            raise ValueError('media_url must be provided')
+
+        payload = self._enhance_payload({'media_url': media_url},
+                                        metadata, callback_url, delete_after_seconds)
+
+        return self._submit_job(payload)
+
+    def submit_job_local_file(
+            self,
+            filename,
+            metadata=None,
+            callback_url=None,
+            delete_after_seconds=None):
+        """Submit a local file for language identification.
+        Note that the content type is inferred if not provided.
+
+        :param filename: path to a local file on disk
+        :param metadata: info to associate with the language identification job
+        :param callback_url: callback url to invoke on job completion as a webhook
+        :param delete_after_seconds: number of seconds after job completion when job is auto-deleted
+        :returns: raw response data
+        :raises: HTTPError
+        """
+        if not filename:
+            raise ValueError('filename must be provided')
+
+        payload = self._enhance_payload({}, metadata, callback_url, delete_after_seconds)
+
+        with open(filename, 'rb') as f:
+            files = {
+                'media': (filename, f),
+                'options': (None, json.dumps(payload, sort_keys=True))
+            }
+
+            response = self._make_http_request(
+                "POST",
+                urljoin(self.base_url, 'jobs'),
+                files=files
+            )
+
+        return LanguageIdentificationJob.from_json(response.json())
+
+    def get_result_json(self, id_):
+        """Get result of a language identification job as json.
+
+        :param id_: id of job to be requested
+        :returns: job result data as raw json
+        :raises: HTTPError
+        """
+        return self._get_result_json(id_, {})
+
+    def get_result_object(self, id_):
+        """Get result of a language identification job as LanguageIdentificationResult object.
+
+        :param id_: id of job to be requested
+        :returns: job result data as LanguageIdentificationResult object
+        :raises: HTTPError
+        """
+        return self._get_result_object(id_, {})
diff --git a/src/rev_ai/models/__init__.py b/src/rev_ai/models/__init__.py
@@ -6,3 +6,4 @@
 from .asynchronous import Job, JobStatus, Account, Transcript, Monologue, Element, CaptionType, \
     SpeakerName
 from .insights import TopicExtractionJob, TopicExtractionResult, Topic, Informant
+from .language_id import LanguageIdentificationJob, LanguageIdentificationResult, LanguageConfidence
diff --git a/src/rev_ai/models/language_id/__init__.py b/src/rev_ai/models/language_id/__init__.py
@@ -0,0 +1,5 @@
+# -*- coding: utf-8 -*-
+"""Language Identification Models"""
+
+from .language_id_job import LanguageIdentificationJob
+from .language_id_result import LanguageIdentificationResult, LanguageConfidence
diff --git a/src/rev_ai/models/language_id/language_id_job.py b/src/rev_ai/models/language_id/language_id_job.py
@@ -0,0 +1,65 @@
+# -*- coding: utf-8 -*-
+"""Job model"""
+
+from ..asynchronous.job_status import JobStatus
+
+
+class LanguageIdentificationJob:
+    def __init__(
+            self, id_, created_on, status,
+            completed_on=None,
+            callback_url=None,
+            metadata=None,
+            media_url=None,
+            failure=None,
+            failure_detail=None,
+            processed_duration_seconds=None,
+            delete_after_seconds=None):
+        """
+        :param id_: unique id of job
+        :param created_on: date and time at which this job was started
+        :param status: current job status 'IN_PROGRESS', 'COMPLETED', or 'FAILED'
+        :param completed_on: date and time at which this job finished being processed
+        :param callback_url: callback_url if provided
+        :param metadata: metadata if provided
+        :param media_url: url of transcribed media if job was submitted this way
+        :param failure: type of failure if job has failed
+        :param failure_detail: more detailed failure message if job has failed
+        :param processed_duration_seconds: duration of file processed in seconds
+        :param delete_after_seconds: seconds before deletion if provided
+        """
+
+        self.id = id_
+        self.created_on = created_on
+        self.status = status
+        self.completed_on = completed_on
+        self.callback_url = callback_url
+        self.metadata = metadata
+        self.media_url = media_url
+        self.failure = failure
+        self.failure_detail = failure_detail
+        self.processed_duration_seconds = processed_duration_seconds
+        self.delete_after_seconds = delete_after_seconds
+
+    def __eq__(self, other):
+        """Override default equality operator"""
+        if isinstance(other, self.__class__):
+            return self.__dict__ == other.__dict__
+        return False
+
+    @classmethod
+    def from_json(cls, json):
+        """Alternate constructor used for parsing json"""
+        return cls(
+            json['id'],
+            json['created_on'],
+            JobStatus.from_string(json['status']),
+            completed_on=json.get('completed_on'),
+            callback_url=json.get('callback_url'),
+            metadata=json.get('metadata'),
+            media_url=json.get('media_url'),
+            failure=json.get('failure'),
+            failure_detail=json.get('failure_detail'),
+            processed_duration_seconds=json.get('processed_duration_seconds'),
+            delete_after_seconds=json.get('delete_after_seconds'),
+        )
diff --git a/src/rev_ai/models/language_id/language_id_result.py b/src/rev_ai/models/language_id/language_id_result.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+"""Language identification result model"""
+
+
+class LanguageIdentificationResult:
+    def __init__(self, top_language, language_confidences):
+        """
+        :param top_language: Language code of predicted language
+        :param language_confidences: List of all potential languages with their corresponding
+            confidence scores
+        """
+        self.top_language = top_language
+        self.language_confidences = language_confidences
+
+    def __eq__(self, other):
+        """Override default equality operator"""
+        if isinstance(other, self.__class__):
+            return all(a == b for a, b in
+                       zip(self.language_confidences, other.language_confidences)) \
+                and self.top_language == other.top_language
+        return False
+
+    @classmethod
+    def from_json(cls, json):
+        """Alternate constructor used for parsing json"""
+        return cls(
+            json['top_language'],
+            [LanguageConfidence.from_json(language_confidence) for
+             language_confidence in json.get('language_confidences', [])])
+
+
+class LanguageConfidence:
+    def __init__(self, language, confidence):
+        """
+        :param language: Language code of predicted language
+        :param confidence: Confidence score of the predicted language, ranges from 0.00 to 1.00
+        """
+        self.language = language
+        self.confidence = confidence
+
+    def __eq__(self, other):
+        """Override default equality operator"""
+        if isinstance(other, self.__class__):
+            return self.language == other.language and self.confidence == other.confidence
+        return False
+
+    @classmethod
+    def from_json(cls, json):
+        """Alternate constructor used for parsing json"""
+        return cls(
+            json['language'],
+            json['confidence'])
diff --git a/tests/test_language_identification_client.py b/tests/test_language_identification_client.py