diff --git a/.gitignore b/.gitignore index 120928f..6a1275a 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ nosetests.xml .settings .project .pydevproject +.idea # Vim *.s[a-w][a-z] diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bbf916a..7676a02 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,60 @@ Changelog ========= +v0.8.5 (2020-11-10) +------------------- +[#220] Use connection create time to determine stale connections [Kaivan Kamali] + +v0.8.4 (2020-10-19) +------------------- +[#221] fix tests which were failing in Py3.4 and 3.7 [Daniel Moore] +[#220] Replace stale connections pulled from idle pools [Kaivan Kamali] +[#3] tests failing on Python3 unicode defaults [Daniel Moore] +[#214] store/load rules as utf-8 in files [Daniel Moore] +[#211] set and report application name to server [Daniel Moore] +[#156] skip ssh/pam login tests if user doesn't exist [Daniel Moore] +[#209] pam/ssl/env auth tests imported from test harness [Daniel Moore] +[#209] store hashed PAM pw [Daniel Moore] +[#205] Disallow PAM plaintext passwords as strong default [Daniel Moore] +[#156] fix the PAM authentication with env json file. [Patrice Linel] +[#207] add raw-acl permissions getter [Daniel Moore] + +v0.8.3 (2020-06-05) +------------------- +- [#3] remove order sensitivity in test_user_dn [Daniel Moore] +- [#5] clarify unlink specific replica example [Terrell Russell] +- [irods/irods#4796] add data object copy tests [Daniel Moore] +- [#5] Additional sections and examples in README [Daniel Moore] +- [#187] Allow query on metadata create and modify times [Daniel Moore] +- [#135] fix queries for multiple AVUs of same name [Daniel Moore] +- [#135] Allow multiple criteria based on column name [Daniel Moore] +- [#180] add the "in" genquery operator [Daniel Moore] +- [#183] fix key error when tables from order_by() not in query() [Daniel Moore] +- [#5] fix ssl example in README.rst [Terrell Russell] + +v0.8.2 (2019-11-13) +------------------- +- [#8] Add PAM Authentication handling (still needs tests) [Mattia D'Antonio] +- [#5] Remove commented-out import [Alan King] +- [#5] Add .idea directory to .gitignore [Jonathan Landrum] +- [#150] Fix specific query argument labeling [Chris Klimowski] +- [#148] DataObjectManager.put() can return the new data_object [Jonathan Landrum] +- [#124] Convert strings going to irods to Unicode [Alan King] +- [#161] Allow dynamic I/O for rule from file [Mathijs Koymans] +- [#162] Include resc_hier in replica information [Brett Hartley] +- [#165] Fix CAT_STATEMENT_TABLE_FULL by auto closing queries [Chris Smeele] +- [#166] Test freeing statements in unfinished query [Daniel Moore] +- [#167] Add metadata for user and usergroup objects [Erwin van Wieringen] +- [#175] Add metadata property for instances of iRODSResource [Daniel Moore] +- [#163] add keywords to query objects [Daniel Moore] + +v0.8.1 (2018-09-27) +------------------- +- [#140] Remove randomization from password test [Alan King] +- [#139] Use uppercase queries in tests [Alan King] +- [#137] Handle filenames with ampersands [Alan King] +- [#126] Add size attribute to iRODSReplica [Alan King] + v0.8.0 (2018-05-03) ------------------- - Add rescName and replNum awareness. [Hao Xu] diff --git a/README.rst b/README.rst index 4c2698c..fbc85e8 100644 --- a/README.rst +++ b/README.rst @@ -2,39 +2,39 @@ Python iRODS Client (PRC) ========================= -`iRODS `_ is an open source distributed data management system. This is a client API implemented in python. +`iRODS `_ is an open source distributed data management system. This is a client API implemented in Python. Currently supported: -- Establish a connection to iRODS, authenticate -- Implement basic Gen Queries (select columns and filtering) -- Support more advanced Gen Queries with limits, offsets, and aggregations +- Python 2.7, 3.4 or newer +- Establish a connection to iRODS +- Authenticate via password, GSI, PAM +- iRODS connection over SSL +- Implement basic GenQueries (select columns and filtering) +- Support more advanced GenQueries with limits, offsets, and aggregations - Query the collections and data objects within a collection - Execute direct SQL queries - Execute iRODS rules - Support read, write, and seek operations for files - PUT/GET data objects -- Create data objects -- Delete data objects - Create collections +- Rename collections - Delete collections +- Create data objects - Rename data objects -- Rename collections +- Delete data objects - Register files and directories - Query metadata for collections and data objects - Add, edit, remove metadata - Replicate data objects to different resource servers - Connection pool management -- Implement gen query result sets as lazy queries +- Implement GenQuery result sets as lazy queries - Return empty result sets when CAT_NO_ROWS_FOUND is raised - Manage permissions - Manage users and groups - Manage resources -- GSI authentication - Unicode strings - Ticket based access -- iRODS connection over SSL -- Python 2.7, 3.4 or newer Installing @@ -58,19 +58,22 @@ Uninstalling pip uninstall python-irodsclient -Establishing a connection -------------------------- +Establishing a (secure) connection +---------------------------------- -Using environment files in ``~/.irods/``: +Using environment files (including any SSL settings) in ``~/.irods/``: >>> import os +>>> import ssl >>> from irods.session import iRODSSession >>> try: ... env_file = os.environ['IRODS_ENVIRONMENT_FILE'] ... except KeyError: ... env_file = os.path.expanduser('~/.irods/irods_environment.json') ... ->>> with iRODSSession(irods_env_file=env_file) as session: +>>> ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) +>>> ssl_settings = {'ssl_context': ssl_context} +>>> with iRODSSession(irods_env_file=env_file, **ssl_settings) as session: ... pass ... >>> @@ -184,26 +187,94 @@ bar Working with metadata --------------------- +To enumerate AVU's on an object. With no metadata attached, the result is an empty list: + + +>>> from irods.meta import iRODSMeta >>> obj = session.data_objects.get("/tempZone/home/rods/test1") >>> print(obj.metadata.items()) [] + +We then add some metadata. +Just as with the icommand equivalent "imeta add ...", we can add multiple AVU's with the same name field: + + >>> obj.metadata.add('key1', 'value1', 'units1') >>> obj.metadata.add('key1', 'value2') >>> obj.metadata.add('key2', 'value3') +>>> obj.metadata.add('key2', 'value4') +>>> print(obj.metadata.items()) +[, , +, ] + + +We can also use Python's item indexing syntax to perform the equivalent of an "imeta set ...", e.g. overwriting +all AVU's with a name field of "key2" in a single update: + + +>>> new_meta = iRODSMeta('key2','value5','units2') +>>> obj.metadata[new_meta.name] = new_meta >>> print(obj.metadata.items()) -[, , -] +[, , + ] + + +Now, with only one AVU on the object with a name of "key2", *get_one* is assured of not throwing an exception: ->>> print(obj.metadata.get_all('key1')) -[, ] >>> print(obj.metadata.get_one('key2')) - + + + +However, the same is not true of "key1": + + +>>> print(obj.metadata.get_one('key1')) +Traceback (most recent call last): + File "", line 1, in + File "/[...]/python-irodsclient/irods/meta.py", line 41, in get_one + raise KeyError +KeyError + + +Finally, to remove a specific AVU from an object: + >>> obj.metadata.remove('key1', 'value1', 'units1') >>> print(obj.metadata.items()) -[, ] +[, ] + + +Alternately, this form of the remove() method can also be useful: + + +>>> for avu in obj.metadata.items(): +... obj.metadata.remove(avu) +>>> print(obj.metadata.items()) +[] + + +If we intended on deleting the data object anyway, we could have just done this instead: + + +>>> obj.unlink(force=True) + + +But notice that the force option is important, since a data object in the trash may still have AVU's attached. + +At the end of a long session of AVU add/manipulate/delete operations, one should make sure to delete all unused +AVU's. We can in fact use any *\*Meta* data model in the queries below, since unattached AVU's are not aware +of the (type of) catalog object they once annotated: + + +>>> from irods.models import (DataObjectMeta, ResourceMeta) +>>> len(list( session.query(ResourceMeta) )) +4 +>>> from irods.test.helpers import remove_unused_metadata +>>> remove_unused_metadata(session) +>>> len(list( session.query(ResourceMeta) )) +0 General queries @@ -235,6 +306,31 @@ General queries /tempZone/home/rods/manager/user_manager.py id=212669 size=5509 /tempZone/home/rods/manager/user_manager.pyc id=212658 size=5233 +Query using other models: + +>>> from irods.column import Criterion +>>> from irods.models import DataObject, DataObjectMeta, Collection, CollectionMeta +>>> from irods.session import iRODSSession +>>> import os +>>> env_file = os.path.expanduser('~/.irods/irods_environment.json') +>>> with iRODSSession(irods_env_file=env_file) as session: +... # by metadata +... # equivalent to 'imeta qu -C type like Project' +... results = session.query(Collection, CollectionMeta).filter( \ +... Criterion('=', CollectionMeta.name, 'type')).filter( \ +... Criterion('like', CollectionMeta.value, '%Project%')) +... for r in results: +... print(r[Collection.name], r[CollectionMeta.name], r[CollectionMeta.value], r[CollectionMeta.units]) +... +('/tempZone/home/rods', 'type', 'Project', None) + +Beginning with version 0.8.3 of PRC, the 'in' genquery operator is also available: + +>>> from irods.models import Resource +>>> from irods.column import In +>>> [ resc[Resource.id]for resc in session.query(Resource).filter(In(Resource.name, ['thisResc','thatResc'])) ] +[10037,10038] + Query with aggregation(min, max, sum, avg, count): >>> with iRODSSession(irods_env_file=env_file) as session: @@ -294,6 +390,7 @@ user_manager.py 212669 __init__.py 212670 __init__.pyc 212671 + Recherché queries ----------------- @@ -318,6 +415,250 @@ not reside in the trash. >>> pprint( list( chained_results ) ) +Instantiating iRODS objects from query results +---------------------------------------------- +The General query works well for getting information out of the ICAT if all we're interested in is +information representable with +primitive types (ie. object names, paths, and ID's, as strings or integers). But Python's object orientation also +allows us to create object references to mirror the persistent entities (instances of *Collection*, *DataObject*, *User*, or *Resource*, etc.) +inhabiting the ICAT. + +**Background:** +Certain iRODS object types can be instantiated easily using the session object's custom type managers, +particularly if some parameter (often just the name or path) of the object is already known: + +>>> type(session.users) + +>>> u = session.users.get('rods') +>>> u.id +10003 + +Type managers are good for specific operations, including object creation and removal:: + +>>> session.collections.create('/tempZone/home/rods/subColln') +>>> session.collections.remove('/tempZone/home/rods/subColln') +>>> session.data_objects.create('/tempZone/home/rods/dataObj') +>>> session.data_objects.unlink('/tempZone/home/rods/dataObj') + +When we retrieve a reference to an existing collection using *get* : + +>>> c = session.collections.get('/tempZone/home/rods') +>>> c + + + +we have, in that variable *c*, a reference to an iRODS *Collection* object whose properties provide +useful information: + +>>> [ x for x in dir(c) if not x.startswith('__') ] +['_meta', 'data_objects', 'id', 'manager', 'metadata', 'move', 'name', 'path', 'remove', 'subcollections', 'unregister', 'walk'] +>>> c.name +'rods' +>>> c.path +'/tempZone/home/rods' +>>> c.data_objects +[] +>>> c.metadata.items() +[ <... list of AVU's attached to Collection c ... > ] + +or whose methods can do useful things: + +>>> for sub_coll in c.walk(): print('---'); pprint( sub_coll ) +[ ...< series of Python data structures giving the complete tree structure below collection 'c'> ...] + +This approach of finding objects by name, or via their relations with other objects (ie "contained by", or in the case of metadata, "attached to"), +is helpful if we know something about the location or identity of what we're searching for, but we don't always +have that kind of a-priori knowledge. + +So, although we can (as seen in the last example) walk an *iRODSCollection* recursively to discover all subordinate +collections and their data objects, this approach will not always be best +for a given type of application or data discovery, especially in more advanced +use cases. + +**A Different Approach:** +For the PRC to be sufficiently powerful for general use, we'll often need at least: + +* general queries, and +* the capabilities afforded by the PRC's object-relational mapping. + +Suppose, for example, we wish to enumerate all collections in the iRODS catalog. + +Again, the object managers are the answer, but they are now invoked using a different scheme: + +>>> from irods.collection import iRODSCollection; from irods.models import Collection +>>> all_collns = [ iRODSCollection(session.collections,result) for result in session.query(Collection) ] + +From there, we have the ability to do useful work, or filtering based on the results of the enumeration. +And, because *all_collns* is an iterable of true objects, we can either use Python's list comprehensions or +execute more catalog queries to achieve further aims. + +Note that, for similar system-wide queries of Data Objects (which, as it happens, are inextricably joined to their +parent Collection objects), a bit more finesse is required. Let us query, for example, to find all data +objects in a particular zone with an AVU that matches the following condition:: + + META_DATA_ATTR_NAME = "irods::alert_time" and META_DATA_ATTR_VALUE like '+0%' + + +>>> import irods.keywords +>>> from irods.data_object import iRODSDataObject +>>> from irods.models import DataObjectMeta, DataObject +>>> from irods.column import Like +>>> q = session.query(DataObject).filter( DataObjectMeta.name == 'irods::alert_time', + Like(DataObjectMeta.value, '+0%') ) +>>> zone_hint = "" # --> add a zone name in quotes to search another zone +>>> if zone_hint: q = q.add_keyword( irods.keywords.ZONE_KW, zone_hint ) +>>> for res in q: +... colln_id = res [DataObject.collection_id] +... collObject = get_collection( colln_id, session, zone = zone_hint) +... dataObject = iRODSDataObject( session.data_objects, parent = collObject, results=[res]) +... print( '{coll}/{data}'.format (coll = collObject.path, data = dataObject.name)) + + +In the above loop we have used a helper function, *get_collection*, to minimize the number of hits to the object +catalog. Otherwise, me might find within a typical application that some Collection objects are being queried at +a high rate of redundancy. *get_collection* can be implemented thusly: + +.. code:: Python + + import collections # of the Pythonic, not iRODS, kind + def makehash(): + # see https://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python + return collections.defaultdict(makehash) + from irods.collection import iRODSCollection + from irods.models import Collection + def get_collection (Id, session, zone=None, memo = makehash()): + if not zone: zone = "" + c_obj = memo[session][zone].get(Id) + if c_obj is None: + q = session.query(Collection).filter(Collection.id==Id) + if zone != '': q = q.add_keyword( irods.keywords.ZONE_KW, zone ) + c_id = q.one() + c_obj = iRODSCollection(session, result = c_id) + memo[session][zone][Id] = c_obj + return c_obj + + +Once instantiated, of course, any *iRODSDataObject*'s data to which we have access permissions is available via its open() method. + +As stated, this type of object discovery requires some extra study and effort, but the ability to search arbitrary iRODS zones +(to which we are federated and have the user permissions) is powerful indeed. + + +Tracking and manipulating replicas of Data objects +-------------------------------------------------- + +Putting together the techniques we've seen so far, it's not hard to write functions +that achieve useful, common goals. Suppose that for all data objects containing replicas on +a given named resource (the "source") we want those replicas "moved" to a second, or +"destination" resource. We can achieve it with a function such as the one below. It +achieves the move via a replication of the data objects found to the destination +resource , followed by a trimming of each replica from the source. We assume for our current +purposed that all replicas are "good", ie have a status of "1" :: + + from irods.resource import iRODSResource + from irods.collection import iRODSCollection + from irods.data_object import iRODSDataObject + from irods.models import Resource,Collection,DataObject + def repl_and_trim (srcRescName, dstRescName = '', verbose = False): + objects_trimmed = 0 + q = session.query(Resource).filter(Resource.name == srcRescName) + srcResc = iRODSResource( session.resources, q.one()) + # loop over data objects found on srcResc + for q_row in session.query(Collection,DataObject) \ + .filter(DataObject.resc_id == srcResc.id): + collection = iRODSCollection (session.collections, result = q_row) + data_object = iRODSDataObject (session.data_objects, parent = collection, results = (q_row,)) + objects_trimmed += 1 + if verbose : + import pprint + print( '--------', data_object.name, '--------') + pprint.pprint( [vars(r) for r in data_object.replicas if + r.resource_name == srcRescName] ) + if dstRescName: + objects_trimmed += 1 + data_object.replicate(dstRescName) + for replica_number in [r.number for r in data_object.replicas]: + options = { kw.DATA_REPL_KW: replica_number } + data_object.unlink( **options ) + return objects_trimmed + + +Listing Users and Groups ; calculating Group Membership +------------------------------------------------------- + +iRODS tracks groups and users using two tables, R_USER_MAIN and R_USER_GROUP. +Under this database schema, all "user groups" are also users: + +>>> from irods.models import User, UserGroup +>>> from pprint import pprint +>>> pprint(list( [ (x[User.id], x[User.name]) for x in session.query(User) ] )) +[(10048, 'alice'), + (10001, 'rodsadmin'), + (13187, 'bobby'), + (10045, 'collab'), + (10003, 'rods'), + (13193, 'empty'), + (10002, 'public')] + +But it's also worth noting that the User.type field will be 'rodsgroup' for any +user ID that iRODS internally recognizes as a "Group": + +>>> groups = session.query(User).filter( User.type == 'rodsgroup' ) + +>>> [x[User.name] for x in groups] +['collab', 'public', 'rodsadmin', 'empty'] + +Since we can instantiate iRODSUserGroup and iRODSUser objects directly from the rows of +a general query on the corresponding tables, it is also straightforward to trace out +the groups' memberships: + +>>> from irods.user import iRODSUser, iRODSUserGroup +>>> grp_usr_mapping = [ (iRODSUserGroup ( session.user_groups, result), iRODSUser (session.users, result)) \ +... for result in session.query(UserGroup,User) ] +>>> pprint( [ (x,y) for x,y in grp_usr_mapping if x.id != y.id ] ) +[(, ), + (, ), + (, ), + (, ), + (, ), + (, )] + +(Note that in general queries, fields cannot be compared to each other, only to literal constants; thus +the '!=' comparison in the Python list comprehension.) + +From the above, we can see that the group 'collab' (with user ID 10045) contains users 'bobby'(13187) and +'alice'(10048) but not 'rods'(10003), as the tuple (10045,10003) is not listed. Group 'rodsadmin'(10001) +contains user 'rods'(10003) but no other users; and group 'public'(10002) by default contains all canonical +users (those whose User.type is 'rodsadmin' or 'rodsuser'). The empty group ('empty') has no users as +members, so it doesn't show up in our final list. + + +Getting and setting permissions +------------------------------- + +We can find the ID's of all the collections writable (ie having "modify" ACL) by, but not owned by, +alice (or even alice#otherZone): + +>>> from irods.models import Collection,CollectionAccess,CollectionUser,User +>>> from irods.column import Like +>>> q = session.query (Collection,CollectionAccess).filter( +... CollectionUser.name == 'alice', # User.zone == 'otherZone', # zone optional +... Like(CollectionAccess.name, 'modify%') ) #defaults to current zone + +If we then want to downgrade those permissions to read-only, we can do the following: + +>>> from irods.access import iRODSAccess +>>> for c in q: +... session.permissions.set( iRODSAccess('read', c[Collection.name], 'alice', # 'otherZone' # zone optional +... )) + +We can also query on access type using its numeric value, which will seem more natural to some: + +>>> OWN = 1200; MODIFY = 1120 ; READ = 1050 +>>> from irods.models import DataAccess, DataObject, User +>>> data_objects_writable = list(session.query(DataObject,DataAccess,User)).filter(User.name=='alice', DataAccess.type >= MODIFY) + + And more... ----------- diff --git a/irods/__init__.py b/irods/__init__.py index 4f43c19..7520648 100644 --- a/irods/__init__.py +++ b/irods/__init__.py @@ -10,8 +10,16 @@ MAX_SQL_ROWS = 256 DEFAULT_CONNECTION_TIMEOUT = 120 -# Other variables AUTH_SCHEME_KEY = 'a_scheme' +AUTH_USER_KEY = 'a_user' +AUTH_PWD_KEY = 'a_pw' +AUTH_TTL_KEY = 'a_ttl' + +NATIVE_AUTH_SCHEME = 'native' + GSI_AUTH_PLUGIN = 'GSI' GSI_AUTH_SCHEME = GSI_AUTH_PLUGIN.lower() GSI_OID = "1.3.6.1.4.1.3536.1.1" # taken from http://j.mp/2hDeczm + +PAM_AUTH_PLUGIN = 'PAM' +PAM_AUTH_SCHEME = PAM_AUTH_PLUGIN.lower() diff --git a/irods/column.py b/irods/column.py index f4f644f..cfed46b 100644 --- a/irods/column.py +++ b/irods/column.py @@ -1,4 +1,5 @@ from __future__ import absolute_import +import six from datetime import datetime from calendar import timegm @@ -38,6 +39,20 @@ def __init__(self, op, query_key, value): def value(self): return self.query_key.column_type.to_irods(self._value) +class In(Criterion): + + def __init__(self, query_key, value): + super(In, self).__init__('in', query_key, value) + + @property + def value(self): + v = "(" + comma = "" + for element in self._value: + v += "{}'{}'".format(comma,element) + comma = "," + v += ")" + return v class Like(Criterion): @@ -113,6 +128,12 @@ def to_python(string): @staticmethod def to_irods(data): + try: + # Convert to Unicode string (aka decode) + data = six.text_type(data, 'utf-8', 'replace') + except TypeError: + # Some strings are already Unicode so they do not need decoding + pass return u"'{}'".format(data) diff --git a/irods/connection.py b/irods/connection.py index 8d791ea..e3a8728 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -4,19 +4,22 @@ import struct import hashlib import six -import struct import os import ssl import hashlib +import datetime from irods.message import ( - iRODSMessage, StartupPack, AuthResponse, AuthChallenge, + iRODSMessage, StartupPack, AuthResponse, AuthChallenge, AuthPluginOut, OpenedDataObjRequest, FileSeekResponse, StringStringMap, VersionResponse, - GSIAuthMessage, OpenIDAuthMessage, ClientServerNegotiation, Error) + OpenIDAuthMessage, ClientServerNegotiation, Error, PluginAuthMessage) from irods.exception import get_exception_by_code, NetworkException from irods import ( MAX_PASSWORD_LENGTH, RESPONSE_LEN, - AUTH_SCHEME_KEY, GSI_AUTH_PLUGIN, GSI_AUTH_SCHEME, GSI_OID) + AUTH_SCHEME_KEY, AUTH_USER_KEY, AUTH_PWD_KEY, AUTH_TTL_KEY, + NATIVE_AUTH_SCHEME, + GSI_AUTH_PLUGIN, GSI_AUTH_SCHEME, GSI_OID, + PAM_AUTH_SCHEME) from irods.client_server_negotiation import ( perform_negotiation, validate_policy, @@ -46,8 +49,12 @@ def __init__(self, URL): def is_str(s): return isinstance(s, basestring) +class PlainTextPAMPasswordError(Exception): pass + class Connection(object): + DISALLOWING_PAM_PLAINTEXT = True + def __init__(self, pool, account, block_on_authURL=True): self.pool = pool @@ -59,15 +66,19 @@ def __init__(self, pool, account, block_on_authURL=True): scheme = self.account.authentication_scheme - if scheme == 'native': + if scheme == NATIVE_AUTH_SCHEME: self._login_native() - elif scheme == 'gsi': + elif scheme == GSI_AUTH_SCHEME: self.client_ctx = None self._login_gsi() elif scheme == 'openid': self._login_openid() + elif scheme == PAM_AUTH_SCHEME: + self._login_pam() else: raise ValueError("Unknown authentication scheme %s" % scheme) + self.create_time = datetime.datetime.now() + self.last_used_time = self.create_time @property def server_version(self): @@ -208,9 +219,11 @@ def _connect(self): "{}:{}".format(*address)) self.socket = s + main_message = StartupPack( (self.account.proxy_user, self.account.proxy_zone), - (self.account.client_user, self.account.client_zone) + (self.account.client_user, self.account.client_zone), + self.pool.application_name ) # No client-server negotiation @@ -353,9 +366,10 @@ def handshake(self, target): def gsi_client_auth_request(self): # Request for authentication with GSI on current user - message_body = GSIAuthMessage( + + message_body = PluginAuthMessage( auth_scheme_=GSI_AUTH_PLUGIN, - context_='a_user=%s' % self.account.client_user + context_='%s=%s' % (AUTH_USER_KEY, self.account.client_user) ) # GSI = 1201 # https://github.com/irods/irods/blob/master/lib/api/include/apiNumber.h#L158 @@ -520,6 +534,48 @@ def read_msg(sock): # no point trying an auth reponse if it failed logger.error('Did not complete OpenID authentication flow') + def _login_pam(self): + + ctx_user = '%s=%s' % (AUTH_USER_KEY, self.account.client_user) + ctx_pwd = '%s=%s' % (AUTH_PWD_KEY, self.account.password) + ctx_ttl = '%s=%s' % (AUTH_TTL_KEY, "60") + + ctx = ";".join([ctx_user, ctx_pwd, ctx_ttl]) + + if type(self.socket) is socket.socket: + if getattr(self,'DISALLOWING_PAM_PLAINTEXT',True): + raise PlainTextPAMPasswordError + + message_body = PluginAuthMessage( + auth_scheme_=PAM_AUTH_SCHEME, + context_=ctx + ) + + auth_req = iRODSMessage( + msg_type='RODS_API_REQ', + msg=message_body, + # int_info=725 + int_info=1201 + ) + + self.send(auth_req) + # Getting the new password + output_message = self.recv() + + auth_out = output_message.get_main_message(AuthPluginOut) + + self.disconnect() + self._connect() + + if hasattr(self.account,'store_pw'): + drop = self.account.store_pw + if type(drop) is list: + drop[:] = [ auth_out.result_ ] + + self._login_native(password=auth_out.result_) + + logger.info("PAM authorization validated") + def read_file(self, desc, size=-1, buffer=None): if size < 0: size = len(buffer) @@ -547,7 +603,11 @@ def read_file(self, desc, size=-1, buffer=None): return response.bs - def _login_native(self): + def _login_native(self, password=None): + + # Default case, PAM login will send a new password + if password is None: + password = self.account.password # authenticate auth_req = iRODSMessage(msg_type='RODS_API_REQ', int_info=703) @@ -569,11 +629,11 @@ def _login_native(self): if six.PY3: challenge = challenge.strip() padded_pwd = struct.pack( - "%ds" % MAX_PASSWORD_LENGTH, self.account.password.encode( + "%ds" % MAX_PASSWORD_LENGTH, password.encode( 'utf-8').strip()) else: padded_pwd = struct.pack( - "%ds" % MAX_PASSWORD_LENGTH, self.account.password) + "%ds" % MAX_PASSWORD_LENGTH, password) m = hashlib.md5() m.update(challenge) diff --git a/irods/data_object.py b/irods/data_object.py index c0a336c..2bc823e 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -23,11 +23,12 @@ def irods_basename(path): class iRODSReplica(object): - def __init__(self, number, status, resource_name, path, **kwargs): + def __init__(self, number, status, resource_name, path, resc_hier, **kwargs): self.number = number self.status = status self.resource_name = resource_name self.path = path + self.resc_hier = resc_hier for key, value in kwargs.items(): setattr(self, key, value) @@ -61,7 +62,9 @@ def __init__(self, manager, parent=None, results=None): r[DataObject.replica_status], r[DataObject.resource_name], r[DataObject.path], - checksum=r[DataObject.checksum] + r[DataObject.resc_hier], + checksum=r[DataObject.checksum], + size=r[DataObject.size] ) for r in replicas] self._meta = None diff --git a/irods/exception.py b/irods/exception.py index a1976a1..74cb9c8 100644 --- a/irods/exception.py +++ b/irods/exception.py @@ -4,6 +4,8 @@ from __future__ import absolute_import import six + + class PycommandsException(Exception): pass @@ -24,6 +26,10 @@ class CollectionDoesNotExist(DoesNotExist): pass +class ZoneDoesNotExist(DoesNotExist): + pass + + class UserDoesNotExist(DoesNotExist): pass @@ -1131,6 +1137,9 @@ class CAT_TABLE_ACCESS_DENIED(CatalogLibraryException): class CAT_UNKNOWN_SPECIFIC_QUERY(CatalogLibraryException): code = -853000 +class CAT_STATEMENT_TABLE_FULL(CatalogLibraryException): + code = -860000 + class RDSException(iRODSException): pass @@ -1874,3 +1883,23 @@ class PHP_REQUEST_STARTUP_ERR(PHPException): class PHP_OPEN_SCRIPT_FILE_ERR(PHPException): code = -1602000 + + +class PAMException(iRODSException): + pass + + +class PAM_AUTH_NOT_BUILT_INTO_CLIENT(PAMException): + code = -991000 + + +class PAM_AUTH_NOT_BUILT_INTO_SERVER(PAMException): + code = -992000 + + +class PAM_AUTH_PASSWORD_FAILED(PAMException): + code = -993000 + + +class PAM_AUTH_PASSWORD_INVALID_TTL(PAMException): + code = -994000 diff --git a/irods/keywords.py b/irods/keywords.py index d70ac24..6880bfe 100644 --- a/irods/keywords.py +++ b/irods/keywords.py @@ -13,7 +13,7 @@ RESC_NAME_KW = "rescName" # resource name # DEST_RESC_NAME_KW = "destRescName" # destination resource name # DEF_RESC_NAME_KW = "defRescName" # default resource name # -BACKUP_RESC_NAME_KW = "backupRescName" # destination resource name # +BACKUP_RESC_NAME_KW = "backupRescName" # backup resource name # DATA_TYPE_KW = "dataType" # data type # DATA_SIZE_KW = "dataSize" CHKSUM_KW = "chksum" diff --git a/irods/manager/access_manager.py b/irods/manager/access_manager.py index 8276e5d..20ec57e 100644 --- a/irods/manager/access_manager.py +++ b/irods/manager/access_manager.py @@ -4,20 +4,38 @@ from irods.manager import Manager from irods.api_number import api_number from irods.message import ModAclRequest, iRODSMessage -from irods.data_object import iRODSDataObject +from irods.data_object import ( iRODSDataObject, irods_dirname, irods_basename ) from irods.collection import iRODSCollection -from irods.models import ( - DataObject, Collection, User, DataAccess, CollectionAccess, CollectionUser) +from irods.models import ( DataObject, Collection, User, CollectionUser, + DataAccess, CollectionAccess ) from irods.access import iRODSAccess +from irods.column import In +from irods.user import iRODSUser +import six import logging logger = logging.getLogger(__name__) +def users_by_ids(session,ids=()): + try: + ids=list(iter(ids)) + except TypeError: + if type(ids) in (str,) + six.integer_types: ids=int(ids) + else: raise + cond = () if not ids \ + else (In(User.id,list(map(int,ids))),) if len(ids)>1 \ + else (User.id == int(ids[0]),) + return [ iRODSUser(session.users,i) + for i in session.query(User.id,User.name,User.type,User.zone).filter(*cond) ] class AccessManager(Manager): - def get(self, target): + def get(self, target, report_raw_acls = False, **kw): + + if report_raw_acls: + return self.__get_raw(target, **kw) # prefer a behavior consistent with 'ils -A` + # different query whether target is an object or a collection if type(target) == iRODSDataObject: access_type = DataAccess @@ -45,6 +63,56 @@ def get(self, target): user_zone=row[user_type.zone] ) for row in results] + def coll_access_query(self,path): + return self.sess.query(Collection, CollectionAccess).filter(Collection.name == path) + + def data_access_query(self,path): + cn = irods_dirname(path) + dn = irods_basename(path) + return self.sess.query(DataObject, DataAccess).filter( Collection.name == cn, DataObject.name == dn ) + + def __get_raw(self, target, **kw): + + ### sample usage: ### + # + # user_id_list = [] # simply to store the user id's from the discovered ACL's + # session.permissions.get( data_or_coll_target, report_raw_acls = True, + # acl_users = user_id_list, + # acl_users_transform = lambda u: u.id) + # + # -> returns list of iRODSAccess objects mapping one-to-one with ACL's stored in the catalog + + users_out = kw.pop( 'acl_users', None ) + T = kw.pop( 'acl_users_transform', lambda value : value ) + + # different choice of query based on whether target is an object or a collection + if isinstance(target, iRODSDataObject): + access_column = DataAccess + query_func = self.data_access_query + + elif isinstance(target, iRODSCollection): + access_column = CollectionAccess + query_func = self.coll_access_query + else: + raise TypeError + + rows = [ r for r in query_func(target.path) ] + userids = set( r[access_column.user_id] for r in rows ) + + user_lookup = { j.id:j for j in users_by_ids(self.sess, userids) } + + if isinstance(users_out, dict): users_out.update (user_lookup) + elif isinstance (users_out, list): users_out += [T(v) for v in user_lookup.values()] + elif isinstance (users_out, set): users_out |= set(T(v) for v in user_lookup.values()) + elif users_out is None: pass + else: raise TypeError + + acls = [ iRODSAccess ( r[access_column.name], + target.path, + user_lookup[r[access_column.user_id]].name, + user_lookup[r[access_column.user_id]].zone ) for r in rows ] + return acls + def set(self, acl, recursive=False, admin=False): prefix = 'admin:' if admin else '' diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 9ceaee8..3c5bf11 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -52,13 +52,14 @@ def get(self, path, file=None, **options): .filter(DataObject.name == irods_basename(path))\ .filter(DataObject.collection_id == parent.id)\ .add_keyword(kw.ZONE_KW, path.split('/')[1]) + results = query.all() # get up to max_rows replicas if len(results) <= 0: raise ex.DataObjectDoesNotExist() return iRODSDataObject(self, parent, results) - def put(self, file, irods_path, **options): + def put(self, file, irods_path, return_data_object=False, **options): if irods_path.endswith('/'): obj = irods_path + os.path.basename(file) else: @@ -76,8 +77,11 @@ def put(self, file, irods_path, **options): options[kw.UPDATE_REPL_KW] = '' self.replicate(obj, **options) + if return_data_object: + return self.get(obj) + - def create(self, path, resource=None, **options): + def create(self, path, resource=None, force=False, **options): options[kw.DATA_TYPE_KW] = 'generic' if resource: @@ -89,6 +93,9 @@ def create(self, path, resource=None, **options): except AttributeError: pass + if force: + options[kw.FORCE_FLAG_KW] = '' + message_body = FileOpenRequest( objPath=path, createMode=0o644, diff --git a/irods/manager/user_manager.py b/irods/manager/user_manager.py index 3bdc1b4..e0c107c 100644 --- a/irods/manager/user_manager.py +++ b/irods/manager/user_manager.py @@ -1,10 +1,5 @@ from __future__ import absolute_import import logging -import six -if six.PY3: - from html import escape -else: - from cgi import escape from irods.models import User, UserGroup from irods.manager import Manager @@ -35,7 +30,8 @@ def create(self, user_name, user_type, user_zone="", auth_str=""): message_body = GeneralAdminRequest( "add", "user", - user_name, + user_name if not user_zone or user_zone == self.sess.zone \ + else "{}#{}".format(user_name,user_zone), user_type, user_zone, auth_str @@ -75,11 +71,6 @@ def modify(self, user_name, option, new_value, user_zone=""): current_password = self.sess.pool.account.password new_value = obf.obfuscate_new_password(new_value, current_password, conn.client_signature) - # html style escaping might have to be generalized: - # https://github.com/irods/irods/blob/4.2.1/lib/core/src/packStruct.cpp#L1913 - # https://github.com/irods/irods/blob/4.2.1/lib/core/src/packStruct.cpp#L1331-L1368 - new_value = escape(new_value, quote=False) - message_body = GeneralAdminRequest( "modify", "user", diff --git a/irods/manager/zone_manager.py b/irods/manager/zone_manager.py new file mode 100644 index 0000000..f6416c2 --- /dev/null +++ b/irods/manager/zone_manager.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import +import logging + +from irods.models import Zone +from irods.zone import iRODSZone +from irods.manager import Manager +from irods.message import GeneralAdminRequest, iRODSMessage +from irods.api_number import api_number +from irods.exception import ZoneDoesNotExist, NoResultFound + +logger = logging.getLogger(__name__) + +class ZoneManager(Manager): + + def get(self, zone_name): + query = self.sess.query(Zone).filter(Zone.name == zone_name) + + try: + result = query.one() + except NoResultFound: + raise ZoneDoesNotExist() + return iRODSZone(self, result) + + def create(self, zone_name, zone_type): + message_body = GeneralAdminRequest( + "add", + "zone", + zone_name, + zone_type, + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GENERAL_ADMIN_AN']) + with self.sess.pool.get_connection() as conn: + conn.send(request) + response = conn.recv() + logger.debug(response.int_info) + return self.get(zone_name) + + def remove(self, zone_name): + message_body = GeneralAdminRequest( + "rm", + "zone", + zone_name + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GENERAL_ADMIN_AN']) + with self.sess.pool.get_connection() as conn: + conn.send(request) + response = conn.recv() + logger.debug(response.int_info) diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 2bc7d31..35413ea 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -188,7 +188,7 @@ class ClientServerNegotiation(Message): class StartupPack(Message): _name = 'StartupPack_PI' - def __init__(self, proxy_user, client_user): + def __init__(self, proxy_user, client_user, application_name = ''): super(StartupPack, self).__init__() if proxy_user and client_user: self.irodsProt = 1 @@ -197,7 +197,7 @@ def __init__(self, proxy_user, client_user): self.clientUser, self.clientRcatZone = client_user self.relVersion = "rods{}.{}.{}".format(*IRODS_VERSION) self.apiVersion = "{3}".format(*IRODS_VERSION) - self.option = "" + self.option = application_name irodsProt = IntegerProperty() reconnFlag = IntegerProperty() @@ -223,6 +223,12 @@ class AuthChallenge(Message): _name = 'authRequestOut_PI' challenge = BinaryProperty(64) + +class AuthPluginOut(Message): + _name = 'authPlugReqOut_PI' + result_ = StringProperty() + # result_ = BinaryProperty(16) + # define InxIvalPair_PI "int iiLen; int *inx(iiLen); int *ivalue(iiLen);" @@ -232,7 +238,7 @@ class BinBytesBuf(Message): buf = BinaryProperty() -class GSIAuthMessage(Message): +class PluginAuthMessage(Message): _name = 'authPlugReqInp_PI' auth_scheme_ = StringProperty() context_ = StringProperty() @@ -259,6 +265,21 @@ def __init__(self, list_of_keyval_tuples ): self._keys.append(k) self._values.append(v) +class _OrderedMultiMapping : + def keys(self): + return self._keys + def values(self): + return self._values + def __len__(self): + return len(self._keys) + def __init__(self, list_of_keyval_tuples ): + self._keys = [] + self._values = [] + for k,v in list_of_keyval_tuples: + self._keys.append(k) + self._values.append(v) + + class IntegerIntegerMap(Message): _name = 'InxIvalPair_PI' diff --git a/irods/message/property.py b/irods/message/property.py index 443d371..b4c250e 100644 --- a/irods/message/property.py +++ b/irods/message/property.py @@ -3,7 +3,10 @@ from irods.message.ordered import OrderedProperty import six - +if six.PY3: + from html import escape +else: + from cgi import escape class MessageProperty(OrderedProperty): @@ -82,24 +85,27 @@ def __init__(self, length=None): self.length = length super(StringProperty, self).__init__() + @staticmethod + def escape_xml_string(string): + return escape(string, quote=False) if six.PY2: def format(self, value): if isinstance(value, str) or isinstance(value, unicode): - return value + return self.escape_xml_string(value) - return str(value) + return self.escape_xml_string(str(value)) else: # Python 3 def format(self, value): if isinstance(value, str): - return value + return self.escape_xml_string(value) if isinstance(value, bytes): - return value.decode() + return self.escape_xml_string(value.decode()) - return str(value) + return self.escape_xml_string(str(value)) def parse(self, value): diff --git a/irods/models.py b/irods/models.py index 08c53e7..4b1b4e2 100644 --- a/irods/models.py +++ b/irods/models.py @@ -22,6 +22,7 @@ class Model(six.with_metaclass(ModelBase, object)): class Zone(Model): id = Column(Integer, 'ZONE_ID', 101) name = Column(String, 'ZONE_NAME', 102) + type = Column(String, 'ZONE_TYPE', 103) class User(Model): @@ -112,6 +113,8 @@ class DataObjectMeta(Model): name = Column(String, 'COL_META_DATA_ATTR_NAME', 600) value = Column(String, 'COL_META_DATA_ATTR_VALUE', 601) units = Column(String, 'COL_META_DATA_ATTR_UNITS', 602) + create_time = Column(DateTime, 'COL_META_DATA_CREATE_TIME', 604) + modify_time = Column(DateTime, 'COL_META_DATA_MODIFY_TIME', 605) class CollectionMeta(Model): @@ -119,6 +122,9 @@ class CollectionMeta(Model): name = Column(String, 'COL_META_COLL_ATTR_NAME', 610) value = Column(String, 'COL_META_COLL_ATTR_VALUE', 611) units = Column(String, 'COL_META_COLL_ATTR_UNITS', 612) + create_time = Column(DateTime, 'COL_META_COLL_CREATE_TIME', 614) + modify_time = Column(DateTime, 'COL_META_COLL_MODIFY_TIME', 615) + class ResourceMeta(Model): @@ -126,6 +132,9 @@ class ResourceMeta(Model): name = Column(String, 'COL_META_RESC_ATTR_NAME', 630) value = Column(String, 'COL_META_RESC_ATTR_VALUE', 631) units = Column(String, 'COL_META_RESC_ATTR_UNITS', 632) + create_time = Column(DateTime, 'COL_META_RESC_CREATE_TIME', 634) + modify_time = Column(DateTime, 'COL_META_RESC_MODIFY_TIME', 635) + class UserMeta(Model): @@ -133,6 +142,9 @@ class UserMeta(Model): name = Column(String, 'COL_META_USER_ATTR_NAME', 640) value = Column(String, 'COL_META_USER_ATTR_VALUE', 641) units = Column(String, 'COL_META_USER_ATTR_UNITS', 642) + create_time = Column(DateTime, 'COL_META_USER_CREATE_TIME', 644) + modify_time = Column(DateTime, 'COL_META_USER_MODIFY_TIME', 645) + class DataAccess(Model): diff --git a/irods/pool.py b/irods/pool.py index b6c0257..86547dc 100644 --- a/irods/pool.py +++ b/irods/pool.py @@ -1,6 +1,8 @@ from __future__ import absolute_import +import datetime import logging import threading +import os from irods import DEFAULT_CONNECTION_TIMEOUT from irods.connection import Connection @@ -8,32 +10,73 @@ logger = logging.getLogger(__name__) +DEFAULT_APPLICATION_NAME = 'python-irodsclient' + + class Pool(object): currentAuth=None - def __init__(self, account, block_on_authURL=True): + + def __init__(self, account, application_name='', connection_refresh_time=-1, block_on_authURL=True): + ''' + Pool( account , application_name='' ) + Create an iRODS connection pool; 'account' is an irods.account.iRODSAccount instance and + 'application_name' specifies the application name as it should appear in an 'ips' listing. + ''' self.account = account self.block_on_authURL=block_on_authURL self._lock = threading.RLock() self.active = set() self.idle = set() self.connection_timeout = DEFAULT_CONNECTION_TIMEOUT + self.application_name = ( os.environ.get('spOption','') or + application_name or + DEFAULT_APPLICATION_NAME ) + + if connection_refresh_time > 0: + self.refresh_connection = True + self.connection_refresh_time = connection_refresh_time + else: + self.refresh_connection = False + self.connection_refresh_time = None def get_connection(self): with self._lock: try: conn = self.idle.pop() + + curr_time = datetime.datetime.now() + # If 'refresh_connection' flag is True and the connection was + # created more than 'connection_refresh_time' seconds ago, + # release the connection (as its stale) and create a new one + if self.refresh_connection and (curr_time - conn.create_time).total_seconds() > self.connection_refresh_time: + logger.debug('Connection with id {} was created more than {} seconds ago. Releasing the connection and creating a new one.'.format(id(conn), self.connection_refresh_time)) + self.release_connection(conn, True) + conn = Connection(self, self.account) + logger.debug("Created new connection with id: {}".format(id(conn))) except KeyError: conn = Connection(self, self.account, block_on_authURL=self.block_on_authURL) + logger.debug("No connection found in idle set. Created a new connection with id: {}".format(id(conn))) + self.active.add(conn) + logger.debug("Adding connection with id {} to active set".format(id(conn))) + logger.debug('num active: {}'.format(len(self.active))) + logger.debug('num idle: {}'.format(len(self.idle))) return conn def release_connection(self, conn, destroy=False): with self._lock: if conn in self.active: self.active.remove(conn) + logger.debug("Removed connection with id: {} from active set".format(id(conn))) if not destroy: + # If 'refresh_connection' flag is True, update connection's 'last_used_time' + if self.refresh_connection: + conn.last_used_time = datetime.datetime.now() self.idle.add(conn) + logger.debug("Added connection with id: {} to idle set".format(id(conn))) elif conn in self.idle and destroy: + logger.debug("Destroyed connection with id: {}".format(id(conn))) self.idle.remove(conn) + logger.debug('num active: {}'.format(len(self.active))) logger.debug('num idle: {}'.format(len(self.idle))) diff --git a/irods/query.py b/irods/query.py index 2006711..0d9f7f4 100644 --- a/irods/query.py +++ b/irods/query.py @@ -70,7 +70,7 @@ def filter(self, *criteria): def order_by(self, column, order='asc'): new_q = self._clone() - del new_q.columns[column] + new_q.columns.pop(column,None) if order == 'asc': new_q.columns[column] = query_number['ORDER_BY'] elif order == 'desc': @@ -193,15 +193,20 @@ def all(self): def get_batches(self): result_set = self.execute() - yield result_set - while result_set.continue_index > 0: - try: - result_set = self.continue_index( - result_set.continue_index).execute() - yield result_set - except CAT_NO_ROWS_FOUND: - break + try: + yield result_set + + while result_set.continue_index > 0: + try: + result_set = self.continue_index( + result_set.continue_index).execute() + yield result_set + except CAT_NO_ROWS_FOUND: + break + except GeneratorExit: + if result_set.continue_index > 0: + self.continue_index(result_set.continue_index).close() def get_results(self): for result_set in self.get_batches(): @@ -213,6 +218,8 @@ def __iter__(self): def one(self): results = self.execute() + if results.continue_index > 0: + self.continue_index(results.continue_index).close() if not len(results): raise NoResultFound() if len(results) > 1: @@ -222,6 +229,8 @@ def one(self): def first(self): query = self.limit(1) results = query.execute() + if results.continue_index > 0: + query.continue_index(results.continue_index).close() if not len(results): return None else: @@ -288,7 +297,7 @@ def execute(self, limit=MAX_SQL_ROWS, offset=0, options=0, conditions=None): conditions = StringStringMap({}) sql_args = {} - for i, arg in enumerate(self._args[:10]): + for i, arg in enumerate(self._args[:10], start=1): sql_args['arg{}'.format(i)] = arg message_body = SpecificQueryRequest(sql=target, diff --git a/irods/resource.py b/irods/resource.py index 7ddd368..c87a7a7 100644 --- a/irods/resource.py +++ b/irods/resource.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from irods.models import Resource +from irods.meta import iRODSMetaCollection import six @@ -37,6 +38,12 @@ def __init__(self, manager, result=None): self._meta = None + @property + def metadata(self): + if not self._meta: + self._meta = iRODSMetaCollection( + self.manager.sess.metadata, Resource, self.name) + return self._meta @property def context_fields(self): diff --git a/irods/rule.py b/irods/rule.py index c81800c..6d7e07a 100644 --- a/irods/rule.py +++ b/irods/rule.py @@ -1,39 +1,31 @@ from __future__ import absolute_import -import six from irods.message import iRODSMessage, StringStringMap, RodsHostAddress, STR_PI, MsParam, MsParamArray, RuleExecutionRequest from irods.api_number import api_number - -if six.PY3: - from html import escape -else: - from cgi import escape - -import logging - -logger = logging.getLogger(__name__) - +from io import open as io_open class Rule(object): def __init__(self, session, rule_file=None, body='', params=None, output=''): self.session = session + self.params = {} + self.output = '' + if rule_file: self.load(rule_file) else: - self.body = '@external\n' + escape(body, quote=True) - if params is None: - self.params = {} - else: - self.params = params + self.body = '@external\n' + body + + # overwrite params and output if received arguments + if params is not None: + self.params = params + if output != '': self.output = output - def load(self, rule_file): - self.params = {} - self.output = '' + def load(self, rule_file, encoding = 'utf-8'): self.body = '@external\n' # parse rule file - with open(rule_file) as f: + with io_open(rule_file, encoding = encoding) as f: for line in f: # parse input line if line.strip().lower().startswith('input'): @@ -61,14 +53,14 @@ def load(self, rule_file): # parse rule else: - self.body += escape(line, quote=True) + self.body += line def execute(self): # rule input param_array = [] for label, value in self.params.items(): - inOutStruct = STR_PI(myStr=escape(value, quote=True)) + inOutStruct = STR_PI(myStr=value) param_array.append(MsParam(label=label, type='STR_PI', inOutStruct=inOutStruct)) inpParamArray = MsParamArray(paramLen=len(param_array), oprType=0, MsParam_PI=param_array) diff --git a/irods/session.py b/irods/session.py index 9be3723..9d7a943 100644 --- a/irods/session.py +++ b/irods/session.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import os import json +import logging from irods.query import Query from irods.pool import Pool from irods.account import iRODSAccount @@ -10,8 +11,12 @@ from irods.manager.access_manager import AccessManager from irods.manager.user_manager import UserManager, UserGroupManager from irods.manager.resource_manager import ResourceManager +from irods.manager.zone_manager import ZoneManager from irods.exception import NetworkException from irods.password_obfuscation import decode +from irods import NATIVE_AUTH_SCHEME, PAM_AUTH_SCHEME + +logger = logging.getLogger(__name__) class iRODSSession(object): @@ -29,6 +34,7 @@ def __init__(self, configure=True, **kwargs): self.users = UserManager(self) self.user_groups = UserGroupManager(self) self.resources = ResourceManager(self) + self.zones = ZoneManager(self) def __enter__(self): return self @@ -73,7 +79,13 @@ def _configure_account(self, **kwargs): # default auth_scheme = 'native' - if auth_scheme != 'native': + if auth_scheme.lower() == PAM_AUTH_SCHEME: + if 'password' in creds: + return iRODSAccount(**creds) + else: + # password will be from irodsA file therefore use native login + creds['irods_authentication_scheme'] = NATIVE_AUTH_SCHEME + elif auth_scheme != 'native': return iRODSAccount(**creds) # Native auth, try to unscramble password @@ -86,10 +98,14 @@ def _configure_account(self, **kwargs): return iRODSAccount(**creds) - def configure(self, **kwargs): account = self._configure_account(**kwargs) - self.pool = Pool(account, kwargs.get ('block_on_authURL', True)) + connection_refresh_time = self.get_connection_refresh_time(**kwargs) + self.pool = Pool(account, application_name=kwargs.pop('application_name',''), connection_refresh_time=connection_refresh_time, + block_on_authURL=kwargs.get ('block_on_authURL', True)) + logger.debug("In iRODSSession's configure(). connection_refresh_time set to {}".format(connection_refresh_time)) + self.pool = Pool(account, application_name=kwargs.pop('application_name',''), connection_refresh_time=connection_refresh_time, + block_on_authURL=kwargs.get ('block_on_authURL', True)) def query(self, *args): return Query(self, *args) @@ -121,6 +137,15 @@ def server_version(self): conn.release() return version + @property + def pam_pw_negotiated(self): + self.pool.account.store_pw = [] + conn = self.pool.get_connection() + pw = getattr(self.pool.account,'store_pw',[]) + delattr( self.pool.account, 'store_pw') + conn.release() + return pw + @property def default_resource(self): return self.pool.account.default_resource @@ -146,8 +171,12 @@ def get_irods_password_file(): @staticmethod def get_irods_env(env_file): - with open(env_file, 'rt') as f: - return json.load(f) + try: + with open(env_file, 'rt') as f: + return json.load(f) + except IOError: + logger.debug("Could not open file {}".format(env_file)) + return {} @staticmethod def get_irods_password(**kwargs): @@ -163,3 +192,25 @@ def get_irods_password(**kwargs): with open(irods_auth_file, 'r') as f: return decode(f.read().rstrip('\n'), uid) + + def get_connection_refresh_time(self, **kwargs): + connection_refresh_time = -1 + + connection_refresh_time = int(kwargs.get('refresh_time', -1)) + if connection_refresh_time != -1: + return connection_refresh_time + + try: + env_file = kwargs['irods_env_file'] + except KeyError: + return connection_refresh_time + + if env_file is not None: + env_file_map = self.get_irods_env(env_file) + connection_refresh_time = int(env_file_map.get('irods_connection_refresh_time', -1)) + if connection_refresh_time < 1: + # Negative values are not allowed. + logger.debug('connection_refresh_time in {} file has value of {}. Only values greater than 1 are allowed.'.format(env_file, connection_refresh_time)) + connection_refresh_time = -1 + + return connection_refresh_time diff --git a/irods/test/access_test.py b/irods/test/access_test.py index 0d1c39f..10f76b4 100644 --- a/irods/test/access_test.py +++ b/irods/test/access_test.py @@ -4,7 +4,10 @@ import sys import unittest from irods.access import iRODSAccess +from irods.user import iRODSUser +from irods.models import User import irods.test.helpers as helpers +from irods.column import In class TestAccess(unittest.TestCase): @@ -22,6 +25,7 @@ def tearDown(self): self.coll.remove(recurse=True, force=True) self.sess.cleanup() + def test_list_acl(self): # test args collection = self.coll_path @@ -114,6 +118,48 @@ def test_set_collection_acl(self): acl1 = iRODSAccess('own', coll.path, user.name, user.zone) self.sess.permissions.set(acl1) + mapping = dict( [ (i,i) for i in ('modify object', 'read object', 'own') ] + + [ ('write','modify object') , ('read', 'read object') ] + ) + + @classmethod + def perms_lists_symm_diff ( cls, a_iter, b_iter ): + fields = lambda perm: (cls.mapping[perm.access_name], perm.user_name, perm.user_zone) + A = set (map(fields,a_iter)) + B = set (map(fields,b_iter)) + return (A-B) | (B-A) + + def test_raw_acls__207(self): + data = helpers.make_object(self.sess,"/".join((self.coll_path,"test_obj"))) + eg = eu = fg = fu = None + try: + eg = self.sess.user_groups.create ('egrp') + eu = self.sess.users.create ('edith','rodsuser') + eg.addmember(eu.name,eu.zone) + fg = self.sess.user_groups.create ('fgrp') + fu = self.sess.users.create ('frank','rodsuser') + fg.addmember(fu.name,fu.zone) + my_ownership = set([('own', self.sess.username, self.sess.zone)]) + #--collection-- + perms1data = [ iRODSAccess ('write',self.coll_path, eg.name, self.sess.zone), + iRODSAccess ('read', self.coll_path, fu.name, self.sess.zone) + ] + for perm in perms1data: self.sess.permissions.set ( perm ) + p1 = self.sess.permissions.get ( self.coll, report_raw_acls = True) + self.assertEqual(self.perms_lists_symm_diff( perms1data, p1 ), my_ownership) + #--data object-- + perms2data = [ iRODSAccess ('write',data.path, fg.name, self.sess.zone), + iRODSAccess ('read', data.path, eu.name, self.sess.zone) + ] + for perm in perms2data: self.sess.permissions.set ( perm ) + p2 = self.sess.permissions.get ( data, report_raw_acls = True) + self.assertEqual(self.perms_lists_symm_diff( perms2data, p2 ), my_ownership) + finally: + ids_for_delete = [ u.id for u in (fu,fg,eu,eg) if u is not None ] + for u in [ iRODSUser(self.sess.users,row) + for row in self.sess.query(User).filter(In(User.id, ids_for_delete)) ]: + u.remove() + if __name__ == '__main__': # let the tests find the parent irods lib diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index 883acec..b341cb1 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -2,8 +2,6 @@ from __future__ import absolute_import import os import sys -import string -import random import unittest from irods.models import User from irods.exception import UserDoesNotExist, ResourceDoesNotExist @@ -332,8 +330,8 @@ def test_set_user_password(self): zone = self.sess.zone self.sess.users.create(self.new_user_name, self.new_user_type) - # make a 12 character pseudo-random password - new_password = ''.join(random.choice(string.ascii_letters + string.digits + string.punctuation) for _ in range(12)) + # make a really horrible password + new_password = '''abc123!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~Z''' self.sess.users.modify(username, 'password', new_password) # open a session as the new user diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 424338e..d7ac1af 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -131,6 +131,37 @@ def test_move_obj_to_coll(self): # remove new collection new_coll.remove(recurse=True, force=True) + def test_copy_existing_obj_to_relative_dest_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will give nondescriptive error') + obj_name = 'this_object_will_exist_once_made' + exists_path = '{}/{}'.format(self.coll_path, obj_name) + helpers.make_object(self.sess, exists_path) + self.assertTrue(self.sess.data_objects.exists(exists_path)) + non_existing_zone = 'this_zone_absent' + relative_dst_path = '{non_existing_zone}/{obj_name}'.format(**locals()) + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(exists_path, relative_dst_path, **options) + + def test_copy_from_nonexistent_absolute_data_obj_path_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will hang the client') + non_existing_zone = 'this_zone_absent' + src_path = '/{non_existing_zone}/non_existing.src'.format(**locals()) + dst_path = '/{non_existing_zone}/non_existing.dst'.format(**locals()) + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(src_path, dst_path, **options) + + def test_copy_from_relative_path_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will hang the client') + src_path = 'non_existing.src' + dst_path = 'non_existing.dst' + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(src_path, dst_path, **options) def test_copy_obj_to_obj(self): # test args @@ -292,7 +323,7 @@ def test_create_with_checksum(self): obj_path = "{collection}/{filename}".format(**locals()) contents = 'blah' * 100 checksum = base64.b64encode( - hashlib.sha256(contents).digest()).decode() + hashlib.sha256(contents.encode()).digest()).decode() # make object in test collection options = {kw.OPR_TYPE_KW: 1} # PUT_OPR @@ -353,7 +384,8 @@ def test_put_file_trigger_pep(self): # make pseudo-random test file filename = 'test_put_file_trigger_pep.txt' test_file = os.path.join('/tmp', filename) - contents = ''.join(random.choice(string.printable) for _ in range(1024)) + contents = ''.join(random.choice(string.printable) for _ in range(1024)).encode() + contents = contents[:1024] with open(test_file, 'wb') as f: f.write(contents) @@ -606,6 +638,65 @@ def test_repave_replicas(self): for resource in ufs_resources: resource.remove() + def test_get_replica_size(self): + session = self.sess + + # Can't do one step open/create with older servers + if session.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # test vars + test_dir = '/tmp' + filename = 'get_replica_size_test_file' + test_file = os.path.join(test_dir, filename) + collection = self.coll.path + + # make random 16byte binary file + original_size = 16 + with open(test_file, 'wb') as f: + f.write(os.urandom(original_size)) + + # make ufs resources + ufs_resources = [] + for i in range(2): + resource_name = 'ufs{}'.format(i) + resource_type = 'unixfilesystem' + resource_host = session.host + resource_path = '/tmp/{}'.format(resource_name) + ufs_resources.append(session.resources.create( + resource_name, resource_type, resource_host, resource_path)) + + # put file in test collection and replicate + obj_path = '{collection}/{filename}'.format(**locals()) + options = {kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + session.data_objects.put(test_file, collection + '/', **options) + session.data_objects.replicate(obj_path, ufs_resources[1].name) + + # make random 32byte binary file + new_size = 32 + with open(test_file, 'wb') as f: + f.write(os.urandom(new_size)) + + # overwrite existing replica 0 with new file + options = {kw.FORCE_FLAG_KW: '', kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + session.data_objects.put(test_file, collection + '/', **options) + + # delete file + os.remove(test_file) + + # ensure that sizes of the replicas are distinct + obj = session.data_objects.get(obj_path, test_dir) + self.assertEqual(obj.replicas[0].size, new_size) + self.assertEqual(obj.replicas[1].size, original_size) + + # remove object + obj.unlink(force=True) + # delete file + os.remove(test_file) + + # remove ufs resources + for resource in ufs_resources: + resource.remove() def test_obj_put_get(self): # Can't do one step open/create with older servers @@ -766,6 +857,46 @@ def test_obj_put_to_default_resource_from_env_file(self): os.remove(new_env_file) + def test_obj_put_and_return_data_object(self): + # Can't do one step open/create with older servers + if self.sess.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # make another UFS resource + session = self.sess + resource_name = 'ufs' + resource_type = 'unixfilesystem' + resource_host = session.host + resource_path = '/tmp/' + resource_name + session.resources.create(resource_name, resource_type, resource_host, resource_path) + + # set default resource to new UFS resource + session.default_resource = resource_name + + # make a local file with random text content + content = ''.join(random.choice(string.printable) for _ in range(1024)) + filename = 'testfile.txt' + file_path = os.path.join('/tmp', filename) + with open(file_path, 'w') as f: + f.write(content) + + # put file + collection = self.coll_path + obj_path = '{collection}/{filename}'.format(**locals()) + + new_file = session.data_objects.put(file_path, obj_path, return_data_object=True) + + # get object and confirm resource + obj = session.data_objects.get(obj_path) + self.assertEqual(new_file.replicas[0].resource_name, obj.replicas[0].resource_name) + + # cleanup + os.remove(file_path) + obj.unlink(force=True) + session.resources.remove(resource_name) + + + def test_force_get(self): # Can't do one step open/create with older servers if self.sess.server_version <= (4, 1, 4): @@ -900,6 +1031,103 @@ def test_modDataObjMeta(self): # delete file os.remove(test_file) + def test_register_with_xml_special_chars(self): + # skip if server is remote + if self.sess.host not in ('localhost', socket.gethostname()): + self.skipTest('Requires access to server-side file(s)') + + # test vars + test_dir = '/tmp' + filename = '''aaa'"<&test&>"'_file''' + test_file = os.path.join(test_dir, filename) + collection = self.coll.path + obj_path = '{collection}/{filename}'.format(**locals()) + + # make random 4K binary file + with open(test_file, 'wb') as f: + f.write(os.urandom(1024 * 4)) + + # register file in test collection + self.sess.data_objects.register(test_file, obj_path) + + # confirm object presence + obj = self.sess.data_objects.get(obj_path) + + # in a real use case we would likely + # want to leave the physical file on disk + obj.unregister() + + # delete file + os.remove(test_file) + + def test_get_data_objects(self): + # Can't do one step open/create with older servers + if self.sess.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # test vars + test_dir = '/tmp' + filename = 'get_data_objects_test_file' + test_file = os.path.join(test_dir, filename) + collection = self.coll.path + + # make random 16byte binary file + original_size = 16 + with open(test_file, 'wb') as f: + f.write(os.urandom(original_size)) + + # make ufs resources + ufs_resources = [] + for i in range(2): + resource_name = 'ufs{}'.format(i) + resource_type = 'unixfilesystem' + resource_host = self.sess.host + resource_path = '/tmp/{}'.format(resource_name) + ufs_resources.append(self.sess.resources.create( + resource_name, resource_type, resource_host, resource_path)) + + # make passthru resource and add ufs1 as a child + passthru_resource = self.sess.resources.create('pt', 'passthru') + self.sess.resources.add_child(passthru_resource.name, ufs_resources[1].name) + + # put file in test collection and replicate + obj_path = '{collection}/{filename}'.format(**locals()) + options = {kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + self.sess.data_objects.put(test_file, '{collection}/'.format(**locals()), **options) + self.sess.data_objects.replicate(obj_path, passthru_resource.name) + + # ensure that replica info is populated + obj = self.sess.data_objects.get(obj_path) + for i in ["number","status","resource_name","path","resc_hier"]: + self.assertIsNotNone(obj.replicas[0].__getattribute__(i)) + self.assertIsNotNone(obj.replicas[1].__getattribute__(i)) + + # ensure replica info is sensible + for i in range(2): + self.assertEqual(obj.replicas[i].number, i) + self.assertEqual(obj.replicas[i].status, '1') + self.assertEqual(obj.replicas[i].path.split('/')[-1], filename) + self.assertEqual(obj.replicas[i].resc_hier.split(';')[-1], ufs_resources[i].name) + + self.assertEqual(obj.replicas[0].resource_name, ufs_resources[0].name) + if self.sess.server_version < (4, 2, 0): + self.assertEqual(obj.replicas[i].resource_name, passthru_resource.name) + else: + self.assertEqual(obj.replicas[i].resource_name, ufs_resources[1].name) + self.assertEqual(obj.replicas[1].resc_hier.split(';')[0], passthru_resource.name) + + # remove object + obj.unlink(force=True) + # delete file + os.remove(test_file) + + # remove resources + self.sess.resources.remove_child(passthru_resource.name, ufs_resources[1].name) + passthru_resource.remove() + for resource in ufs_resources: + resource.remove() + + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/test/helpers.py b/irods/test/helpers.py index 76d9204..a23fd1b 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -124,6 +124,17 @@ def compute_sha256_digest(file_path): return base64.b64encode(hasher.digest()).decode() +def remove_unused_metadata(session): + from irods.message import GeneralAdminRequest + from irods.api_number import api_number + message_body = GeneralAdminRequest( 'rm', 'unusedAVUs', '','','','') + req = iRODSMessage("RODS_API_REQ", msg = message_body,int_info=api_number['GENERAL_ADMIN_AN']) + with session.pool.get_connection() as conn: + conn.send(req) + response=conn.recv() + if (response.int_info != 0): raise RuntimeError("Error removing unused AVUs") + + @contextlib.contextmanager def file_backed_up(filename): with tempfile.NamedTemporaryFile(prefix=os.path.basename(filename)) as f: diff --git a/irods/test/login_auth_test.py b/irods/test/login_auth_test.py new file mode 100644 index 0000000..b92e8d4 --- /dev/null +++ b/irods/test/login_auth_test.py @@ -0,0 +1,332 @@ +#! /usr/bin/env python +from __future__ import print_function +from __future__ import absolute_import +import os +import sys +import unittest +import textwrap +import json +import shutil +import ssl +import irods.test.helpers as helpers +from irods.connection import Connection +from irods.session import iRODSSession +from irods.rule import Rule +from irods.models import User +from socket import gethostname +from irods.password_obfuscation import (encode as pw_encode) +from irods.connection import PlainTextPAMPasswordError +import contextlib +from re import compile as regex +try: + from re import _pattern_type as regex_type +except ImportError: + from re import Pattern as regex_type # Python 3.7+ + + +def json_file_update(fname,keys_to_delete=(),**kw): + j = json.load(open(fname,'r')) + j.update(**kw) + for k in keys_to_delete: + if k in j: del j [k] + elif isinstance(k,regex_type): + jk = [i for i in j.keys() if k.search(i)] + for ky in jk: del j[ky] + with open(fname,'w') as out: + json.dump(j, out, indent=4) + +def env_dir_fullpath(authtype): return os.path.join( os.environ['HOME'] , '.irods.' + authtype) +def json_env_fullpath(authtype): return os.path.join( env_dir_fullpath(authtype), 'irods_environment.json') +def secrets_fullpath(authtype): return os.path.join( env_dir_fullpath(authtype), '.irodsA') + +SERVER_ENV_PATH = os.path.expanduser('~irods/.irods/irods_environment.json') + +SERVER_ENV_SSL_SETTINGS = { + "irods_ssl_certificate_chain_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_certificate_key_file": "/etc/irods/ssl/irods.key", + "irods_ssl_dh_params_file": "/etc/irods/ssl/dhparams.pem", + "irods_ssl_ca_certificate_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_verify_server": "cert" +} + +def update_service_account_for_SSL(): + json_file_update( SERVER_ENV_PATH, **SERVER_ENV_SSL_SETTINGS ) + +CLIENT_OPTIONS_FOR_SSL = { + "irods_client_server_policy": "CS_NEG_REQUIRE", + "irods_client_server_negotiation": "request_server_negotiation", + "irods_ssl_ca_certificate_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_verify_server": "cert", + "irods_encryption_key_size": 16, + "irods_encryption_salt_size": 8, + "irods_encryption_num_hash_rounds": 16, + "irods_encryption_algorithm": "AES-256-CBC" +} + + +def client_env_from_server_env(user_name, auth_scheme=""): + cli_env = {} + with open(SERVER_ENV_PATH) as f: + srv_env = json.load(f) + for k in [ "irods_host", "irods_zone_name", "irods_port" ]: + cli_env [k] = srv_env[k] + cli_env["irods_user_name"] = user_name + if auth_scheme: + cli_env["irods_authentication_scheme"] = auth_scheme + return cli_env + +@contextlib.contextmanager +def pam_password_in_plaintext(allow=True): + saved = bool(Connection.DISALLOWING_PAM_PLAINTEXT) + try: + Connection.DISALLOWING_PAM_PLAINTEXT = not(allow) + yield + finally: + Connection.DISALLOWING_PAM_PLAINTEXT = saved + + +class TestLogins(unittest.TestCase): + ''' + This is due to be moved into Jenkins CI along core and other iRODS tests. + Until then, for these tests to run successfully, we require: + 1. First run ./setupssl.py (sets up SSL keys etc. in /etc/irods/ssl) + 2. Add & override configuration entries in /var/lib/irods/irods_environment + Per https://slides.com/irods/ugm2018-ssl-and-pam-configuration#/3/7 + 3. Create rodsuser alissa and corresponding unix user with the appropriate + passwords as below. + ''' + + test_rods_user = 'alissa' + + user_auth_envs = { + '.irods.pam': { + 'USER': test_rods_user, + 'PASSWORD': 'test123', # UNIX pw + 'AUTH': 'pam' + }, + '.irods.native': { + 'USER': test_rods_user, + 'PASSWORD': 'apass', # iRODS pw + 'AUTH': 'native' + } + } + + env_save = {} + + @contextlib.contextmanager + def setenv(self,var,newvalue): + try: + self.env_save[var] = os.environ.get(var,None) + os.environ[var] = newvalue + yield newvalue + finally: + oldvalue = self.env_save[var] + if oldvalue is None: + del os.environ[var] + else: + os.environ[var]=oldvalue + + @classmethod + def create_env_dirs(cls): + dirs = {} + retval = [] + # -- create environment configurations and secrets + with pam_password_in_plaintext(): + for dirname,lookup in cls.user_auth_envs.items(): + if lookup['AUTH'] == 'pam': + ses = iRODSSession( host=gethostname(), + user=lookup['USER'], + zone='tempZone', + authentication_scheme=lookup['AUTH'], + password=lookup['PASSWORD'], + port= 1247 ) + try: + pam_hashes = ses.pam_pw_negotiated + except AttributeError: + pam_hashes = [] + if not pam_hashes: print('Warning ** PAM pw couldnt be generated' ); break + scrambled_pw = pw_encode( pam_hashes[0] ) + #elif lookup['AUTH'] == 'XXXXXX': # TODO: insert other authentication schemes here + elif lookup['AUTH'] in ('native', '',None): + scrambled_pw = pw_encode( lookup['PASSWORD'] ) + cl_env = client_env_from_server_env(cls.test_rods_user) + if lookup.get('AUTH',None) is not None: # - specify auth scheme only if given + cl_env['irods_authentication_scheme'] = lookup['AUTH'] + dirbase = os.path.join(os.environ['HOME'],dirname) + dirs[dirbase] = { 'secrets':scrambled_pw , 'client_environment':cl_env } + + # -- create the environment directories and write into them the configurations just created + for absdir in dirs.keys(): + shutil.rmtree(absdir,ignore_errors=True) + os.mkdir(absdir) + with open(os.path.join(absdir,'irods_environment.json'),'w') as envfile: + envfile.write('{}') + json_file_update(envfile.name, **dirs[absdir]['client_environment']) + with open(os.path.join(absdir,'.irodsA'),'wb') as secrets_file: + secrets_file.write(dirs[absdir]['secrets']) + os.chmod(secrets_file.name,0o600) + + retval = dirs.keys() + return retval + + + @staticmethod + def get_server_ssl_negotiation( session ): + + rule_body = textwrap.dedent(''' + test { *out=""; acPreConnect(*out); + writeLine("stdout", "*out"); + } + ''') + myrule = Rule(session, body=rule_body, params={}, output='ruleExecOut') + out_array = myrule.execute() + buf = out_array.MsParam_PI[0].inOutStruct.stdoutBuf.buf.decode('utf-8') + eol_offset = buf.find('\n') + return buf[:eol_offset] if eol_offset >= 0 else None + + @classmethod + def setUpClass(cls): + cls.admin = helpers.make_session() + if cls.test_rods_user in (row[User.name] for row in cls.admin.query(User.name)): + cls.server_ssl_setting = cls.get_server_ssl_negotiation( cls.admin ) + cls.envdirs = cls.create_env_dirs() + if not cls.envdirs: + raise RuntimeError('Could not create one or more client environments') + + @classmethod + def tearDownClass(cls): + for envdir in getattr(cls, 'envdirs', []): + shutil.rmtree(envdir, ignore_errors=True) + cls.admin.cleanup() + + def setUp(self): + if not getattr(self, 'envdirs', []): + self.skipTest('The test_rods_user "{}" does not exist'.format(self.test_rods_user)) + super(TestLogins,self).setUp() + + def tearDown(self): + super(TestLogins,self).tearDown() + + def validate_session(self, session, verbose=False, **options): + + # - try to get the home collection + home_coll = '/{0.zone}/home/{0.username}'.format(session) + self.assertTrue(session.collections.get(home_coll).path == home_coll) + if verbose: print(home_coll) + # - check user is as expected + self.assertEqual( session.username, self.test_rods_user ) + # - check socket type (normal vs SSL) against whether ssl requested + use_ssl = options.pop('ssl',None) + if use_ssl is not None: + my_connect = [s for s in (session.pool.active|session.pool.idle)] [0] + self.assertEqual( bool( use_ssl ), my_connect.socket.__class__ is ssl.SSLSocket ) + + +# def test_demo(self): self.demo() + +# def demo(self): # for future reference - skipping based on CS_NEG_DONT_CARE setting +# if self.server_ssl_setting == 'CS_NEG_DONT_CARE': +# self.skipTest('skipping b/c setting is DONT_CARE') +# self.assertTrue (False) + + + def tst0(self, ssl_opt, auth_opt, env_opt ): + auth_opt_explicit = 'native' if auth_opt=='' else auth_opt + verbosity=False + #verbosity='' # -- debug - sanity check by printing out options applied + out = {'':''} + if env_opt: + with self.setenv('IRODS_ENVIRONMENT_FILE', json_env_fullpath(auth_opt_explicit)) as env_file,\ + self.setenv('IRODS_AUTHENTICATION_FILE', secrets_fullpath(auth_opt_explicit)): + cli_env_extras = {} if not(ssl_opt) else dict( CLIENT_OPTIONS_FOR_SSL ) + if auth_opt: + cli_env_extras.update( irods_authentication_scheme = auth_opt ) + remove=[] + else: + remove=[regex('authentication_')] + with helpers.file_backed_up(env_file): + json_file_update( env_file, keys_to_delete=remove, **cli_env_extras ) + session = iRODSSession(irods_env_file=env_file) + out = json.load(open(env_file)) + self.validate_session( session, verbose = verbosity, ssl = ssl_opt ) + session.cleanup() + out['ARGS']='no' + else: + session_options = {} + if auth_opt: + session_options.update (authentication_scheme = auth_opt) + if ssl_opt: + SSL_cert = CLIENT_OPTIONS_FOR_SSL["irods_ssl_ca_certificate_file"] + session_options.update( + ssl_context = ssl.create_default_context ( purpose = ssl.Purpose.SERVER_AUTH, + capath = None, + cadata = None, + cafile = SSL_cert), + **CLIENT_OPTIONS_FOR_SSL ) + lookup = self.user_auth_envs ['.irods.'+('native' if not(auth_opt) else auth_opt)] + session = iRODSSession ( host=gethostname(), + user=lookup['USER'], + zone='tempZone', + password=lookup['PASSWORD'], + port= 1247, + **session_options ) + out = session_options + self.validate_session( session, verbose = verbosity, ssl = ssl_opt ) + session.cleanup() + out['ARGS']='yes' + + if verbosity == '': + print ('--- ssl:',ssl_opt,'/ auth:',repr(auth_opt),'/ env:',env_opt) + print ('--- > ',json.dumps({k:v for k,v in out.items() if k != 'ssl_context'},indent=4)) + print ('---') + + # == test defaulting to 'native' + + def test_01(self): + self.tst0 ( ssl_opt = True , auth_opt = '' , env_opt = False ) + def test_02(self): + self.tst0 ( ssl_opt = False, auth_opt = '' , env_opt = False ) + def test_03(self): + self.tst0 ( ssl_opt = True , auth_opt = '' , env_opt = True ) + def test_04(self): + self.tst0 ( ssl_opt = False, auth_opt = '' , env_opt = True ) + + # == test explicit scheme 'native' + + def test_1(self): + self.tst0 ( ssl_opt = True , auth_opt = 'native' , env_opt = False ) + + def test_2(self): + self.tst0 ( ssl_opt = False, auth_opt = 'native' , env_opt = False ) + + def test_3(self): + self.tst0 ( ssl_opt = True , auth_opt = 'native' , env_opt = True ) + + def test_4(self): + self.tst0 ( ssl_opt = False, auth_opt = 'native' , env_opt = True ) + + # == test explicit scheme 'pam' + + def test_5(self): + self.tst0 ( ssl_opt = True, auth_opt = 'pam' , env_opt = False ) + + def test_6(self): + try: + self.tst0 ( ssl_opt = False, auth_opt = 'pam' , env_opt = False ) + except PlainTextPAMPasswordError: + pass + else: + # -- no exception raised + self.fail("PlainTextPAMPasswordError should have been raised") + + def test_7(self): + self.tst0 ( ssl_opt = True , auth_opt = 'pam' , env_opt = True ) + + def test_8(self): + self.tst0 ( ssl_opt = False, auth_opt = 'pam' , env_opt = True ) + + +if __name__ == '__main__': + # let the tests find the parent irods lib + sys.path.insert(0, os.path.abspath('../..')) + unittest.main() diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index 49fd24f..f6a13b0 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -5,7 +5,7 @@ import sys import unittest from irods.meta import iRODSMeta -from irods.models import DataObject, Collection +from irods.models import DataObject, Collection, Resource import irods.test.helpers as helpers from six.moves import range @@ -44,6 +44,19 @@ def test_get_obj_meta(self): # there should be no metadata at this point assert len(meta) == 0 + def test_resc_meta(self): + rescname = 'demoResc' + self.sess.resources.get(rescname).metadata.remove_all() + self.sess.metadata.set(Resource, rescname, iRODSMeta('zero','marginal','cost')) + self.sess.metadata.add(Resource, rescname, iRODSMeta('zero','marginal')) + self.sess.metadata.set(Resource, rescname, iRODSMeta('for','ever','after')) + meta = self.sess.resources.get(rescname).metadata + self.assertTrue( len(meta) == 3 ) + resource = self.sess.resources.get(rescname) + all_AVUs= resource.metadata.items() + for avu in all_AVUs: + resource.metadata.remove(avu) + self.assertTrue(0 == len(self.sess.resources.get(rescname).metadata)) def test_add_obj_meta(self): # add metadata to test object diff --git a/irods/test/pool_test.py b/irods/test/pool_test.py index 0f38ff8..0ecaa66 100644 --- a/irods/test/pool_test.py +++ b/irods/test/pool_test.py @@ -1,15 +1,18 @@ #! /usr/bin/env python from __future__ import absolute_import +import datetime import os import sys +import time import unittest import irods.test.helpers as helpers + class TestPool(unittest.TestCase): def setUp(self): - self.sess = helpers.make_session() + self.sess = helpers.make_session(irods_env_file="./test-data/irods_environment.json") def tearDown(self): '''Close connections @@ -17,7 +20,7 @@ def tearDown(self): self.sess.cleanup() def test_release_connection(self): - with self.sess.pool.get_connection() as conn: + with self.sess.pool.get_connection(): self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -34,7 +37,7 @@ def test_destroy_active(self): self.assertEqual(0, len(self.sess.pool.idle)) def test_destroy_idle(self): - with self.sess.pool.get_connection() as conn: + with self.sess.pool.get_connection(): self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -58,6 +61,168 @@ def test_release_disconnected(self): self.assertEqual(0, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) + def test_connection_create_time(self): + # Get a connection and record its object ID and create_time + # Release the connection (goes from active to idle queue) + # Again, get a connection. Should get the same connection back. + # I.e., the object IDs should match. However, the new connection + # should have a more recent 'last_used_time' + conn_obj_id_1 = None + conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + create_time_1 = conn.create_time + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + create_time_2 = conn.create_time + last_used_time_2 = conn.last_used_time + self.assertEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(curr_time >= create_time_2) + self.assertTrue(curr_time >= last_used_time_2) + self.assertTrue(last_used_time_2 >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_refresh_connection(self): + # Set 'irods_connection_refresh_time' to '3' (in seconds) in + # ~/.irods/irods_environment.json file. This means any connection + # that was created more than 3 seconds ago will be dropped and + # a new connection is created/returned. This is to avoid + # issue with idle connections that are dropped. + conn_obj_id_1 = None + conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + create_time_1 = conn.create_time + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + # Wait more than 'irods_connection_refresh_time' seconds, + # which is set to 3. Connection object should have a different + # object ID (as a new connection is created) + time.sleep(5) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + create_time_2 = conn.create_time + last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= create_time_2) + self.assertTrue(curr_time >= last_used_time_2) + self.assertNotEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(create_time_2 > create_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_no_refresh_connection(self): + # Set 'irods_connection_refresh_time' to '3' (in seconds) in + # ~/.irods/irods_environment.json file. This means any connection + # created more than 3 seconds ago will be dropped and + # a new connection is created/returned. This is to avoid + # issue with idle connections that are dropped. + conn_obj_id_1 = None + conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + create_time_1 = conn.create_time + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + # Wait less than 'irods_connection_refresh_time' seconds, + # which is set to 3. Connection object should have the same + # object ID (as idle time is less than 'irods_connection_refresh_time') + time.sleep(1) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + create_time_2 = conn.create_time + last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= create_time_2) + self.assertTrue(curr_time >= last_used_time_2) + self.assertEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(create_time_2 >= create_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_get_connection_refresh_time_no_env_file_input_param(self): + connection_refresh_time = self.sess.get_connection_refresh_time(first_name="Magic", last_name="Johnson") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_none_existant_env_file(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_non_existant.json") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_no_connection_refresh_field(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_no_refresh_field.json") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_negative_connection_refresh_field(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment_negative_refresh_field.json") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time(self): + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file="./test-data/irods_environment.json") + self.assertEqual(connection_refresh_time, 3) if __name__ == '__main__': # let the tests find the parent irods lib diff --git a/irods/test/query_test.py b/irods/test/query_test.py index d9a5f98..8bba108 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -1,19 +1,40 @@ #! /usr/bin/env python +# -*- coding: utf-8 -*- from __future__ import absolute_import import os +import six import sys +import tempfile import unittest +import time +import uuid from datetime import datetime -from irods.models import User, Collection, DataObject, Resource +from irods.models import (User, UserMeta, + Resource, ResourceMeta, + Collection, CollectionMeta, + DataObject, DataObjectMeta ) + from irods.exception import MultipleResultsFound, CAT_UNKNOWN_SPECIFIC_QUERY, CAT_INVALID_ARGUMENT from irods.query import SpecificQuery -from irods.column import Like, Between +from irods.column import Like, Between, In +from irods.meta import iRODSMeta from irods import MAX_SQL_ROWS import irods.test.helpers as helpers +from six.moves import range as py3_range + +IRODS_STATEMENT_TABLE_SIZE = 50 + +def rows_returned(query): + return len( list(query) ) class TestQuery(unittest.TestCase): + Iterate_to_exhaust_statement_table = range(IRODS_STATEMENT_TABLE_SIZE + 1) + + More_than_one_batch = 2*MAX_SQL_ROWS # may need to increase if PRC default page + # size is increased beyond 500 + def setUp(self): self.sess = helpers.make_session() @@ -26,14 +47,12 @@ def setUp(self): self.coll = self.sess.collections.create(self.coll_path) self.obj = self.sess.data_objects.create(self.obj_path) - def tearDown(self): '''Remove test data and close connections ''' self.coll.remove(recurse=True, force=True) self.sess.cleanup() - def test_collections_query(self): # collection query test result = self.sess.query(Collection.id, Collection.name).all() @@ -145,6 +164,20 @@ def test_query_order_by_invalid_param(self): results = self.sess.query(User.name).order_by( User.name, order='moo').all() + def test_query_order_by_col_not_in_result__183(self): + test_collection_size = 8 + test_collection_path = '/{0}/home/{1}/testcoln_for_col_not_in_result'.format(self.sess.zone, self.sess.username) + c1 = c2 = None + try: + c1 = helpers.make_test_collection( self.sess, test_collection_path+"1", obj_count=test_collection_size) + c2 = helpers.make_test_collection( self.sess, test_collection_path+"2", obj_count=test_collection_size) + d12 = [ sorted([d.id for d in c.data_objects]) for c in sorted((c1,c2),key=lambda c:c.id) ] + query = self.sess.query(DataObject).filter(Like(Collection.name, test_collection_path+"_")).order_by(Collection.id) + q12 = list(map(lambda res:res[DataObject.id], query)) + self.assertTrue(d12[0] + d12[1] == sorted( q12[:test_collection_size] ) + sorted( q12[test_collection_size:])) + finally: + if c1: c1.remove(recurse=True,force=True) + if c2: c2.remove(recurse=True,force=True) def test_query_with_like_condition(self): '''Equivalent to: @@ -154,7 +187,6 @@ def test_query_with_like_condition(self): query = self.sess.query(Resource).filter(Like(Resource.name, 'dem%')) self.assertIn('demoResc', [row[Resource.name] for row in query]) - def test_query_with_between_condition(self): '''Equivalent to: iquest "select RESC_NAME, COLL_NAME, DATA_NAME where DATA_MODIFY_TIME between '01451606400' '...'" @@ -171,6 +203,272 @@ def test_query_with_between_condition(self): res_str = '{} {}/{}'.format(result[Resource.name], result[Collection.name], result[DataObject.name]) self.assertIn(session.zone, res_str) + def test_query_with_in_condition(self): + collection = self.coll_path + filename = 'test_query_id_in_list.txt' + file_path = '{collection}/{filename}'.format(**locals()) + obj1 = helpers.make_object(self.sess, file_path+'-1') + obj2 = helpers.make_object(self.sess, file_path+'-2') + ids = [x.id for x in (obj1,obj2)] + for number in range(3): # slice for empty(:0), first(:1) or both(:2) + search_tuple = (ids[:number] if number >= 1 else [0] + ids[:number]) + q = self.sess.query(DataObject.name).filter(In( DataObject.id, search_tuple )) + self.assertEqual (number, rows_returned(q)) + + def test_simultaneous_multiple_AVU_joins(self): + objects = [] + decoys = [] + try: + collection = self.coll_path + filename = 'test_multiple_AVU_joins' + file_path = '{collection}/{filename}'.format(**locals()) + for x in range(3,9): + obj = helpers.make_object(self.sess, file_path+'-{}'.format(x)) # with metadata + objects.append(obj) + obj.metadata.add('A_meta','1{}'.format(x)) + obj.metadata.add('B_meta','2{}'.format(x)) + decoys.append(helpers.make_object(self.sess, file_path+'-dummy{}'.format(x))) # without metadata + self.assertTrue( len(objects) > 0 ) + + # -- test simple repeat of same column -- + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'A_meta', DataObjectMeta.value < '20').\ + filter(DataObjectMeta.name == 'B_meta', DataObjectMeta.value >= '20') + self.assertTrue( rows_returned(q) == len(objects) ) + + # -- test no-stomp of previous filter -- + self.assertTrue( ('B_meta','28') in [ (x.name,x.value) for x in objects[-1].metadata.items() ] ) + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value < '28').\ + filter(DataObjectMeta.name == 'B_meta').filter(Like(DataObjectMeta.value, '2_')) + self.assertTrue( rows_returned(q) == len(objects)-1 ) + + # -- test multiple AVU's by same attribute name -- + objects[-1].metadata.add('B_meta','29') + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '28').\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '29') + self.assertTrue(rows_returned(q) == 1) + finally: + for x in (objects + decoys): + x.unlink(force=True) + helpers.remove_unused_metadata( self.sess ) + + def test_query_on_AVU_times(self): + test_collection_path = '/{zone}/home/{user}/test_collection'.format( zone = self.sess.zone, user = self.sess.username) + testColl = helpers.make_test_collection(self.sess, test_collection_path, obj_count = 1) + testData = testColl.data_objects[0] + testResc = self.sess.resources.get('demoResc') + testUser = self.sess.users.get(self.sess.username) + objects = { 'r': testResc, 'u': testUser, 'c':testColl, 'd':testData } + object_IDs = { sfx:obj.id for sfx,obj in objects.items() } + tables = { 'r': (Resource, ResourceMeta), + 'u': (User, UserMeta), + 'd': (DataObject, DataObjectMeta), + 'c': (Collection, CollectionMeta) } + try: + str_number_incr = lambda str_numbers : str(1+max([0]+[int(n) if n.isdigit() else 0 for n in str_numbers])) + AVU_unique_incr = lambda obj,suffix='' : ( 'a_'+suffix, + 'v_'+suffix, + str_number_incr(avu.units for avu in obj.metadata.items()) ) + before = datetime.utcnow() + time.sleep(1.5) + for suffix,obj in objects.items(): obj.metadata.add( *AVU_unique_incr(obj,suffix) ) + after = datetime.utcnow() + for suffix, tblpair in tables.items(): + self.sess.query( *tblpair ).filter(tblpair[1].modify_time <= after )\ + .filter(tblpair[1].modify_time > before )\ + .filter(tblpair[0].id == object_IDs[suffix] ).one() + self.sess.query( *tblpair ).filter(tblpair[1].create_time <= after )\ + .filter(tblpair[1].create_time > before )\ + .filter(tblpair[0].id == object_IDs[suffix] ).one() + finally: + for obj in objects.values(): + for avu in obj.metadata.items(): obj.metadata.remove(avu) + testColl.remove(recurse=True,force=True) + helpers.remove_unused_metadata( self.sess ) + + + def test_multiple_criteria_on_one_column_name(self): + collection = self.coll_path + filename = 'test_multiple_AVU_joins' + file_path = '{collection}/{filename}'.format(**locals()) + objects = [] + nobj = 0 + for x in range(3,9): + nobj += 2 + obj1 = helpers.make_object(self.sess, file_path+'-{}'.format(x)) + obj2 = helpers.make_object(self.sess, file_path+'-dummy{}'.format(x)) + objects.extend([obj1,obj2]) + self.assertTrue( nobj > 0 and len(objects) == nobj ) + q = self.sess.query(Collection,DataObject) + dummy_test = [d for d in q if d[DataObject.name][-1:] != '8' + and d[DataObject.name][-7:-1] == '-dummy' ] + self.assertTrue( len(dummy_test) > 0 ) + q = q. filter(Like(DataObject.name, '%-dummy_')).\ + filter(Collection.name == collection) .\ + filter(DataObject.name != (filename + '-dummy8')) + results = [r[DataObject.name] for r in q] + self.assertTrue(len(results) == len(dummy_test)) + + @unittest.skipIf(six.PY3, 'Test is for python2 only') + def test_query_for_data_object_with_utf8_name_python2(self): + filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + self.assertEqual(self.FILENAME_PREFIX.encode('utf-8'), filename_prefix) + _,test_file = tempfile.mkstemp(prefix=filename_prefix) + obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + try: + self.sess.data_objects.register(test_file, obj_path) + results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() + result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) + result_physical_path = results[DataObject.path] + self.assertEqual(result_logical_path, obj_path.decode('utf8')) + self.assertEqual(result_physical_path, test_file.decode('utf8')) + finally: + self.sess.data_objects.unregister(obj_path) + os.remove(test_file) + + # view/change this line in text editors under own risk: + FILENAME_PREFIX = u'_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + + @unittest.skipIf(six.PY2, 'Test is for python3 only') + def test_query_for_data_object_with_utf8_name_python3(self): + def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): + file_path = os.path.join ((dir or os.environ.get('TMPDIR') or '/tmp'), prefix+'-'+str(uuid.uuid1())) + encoded_file_path = file_path.encode('utf-8') + return os.open(encoded_file_path,os.O_CREAT|os.O_RDWR,mode=open_mode), encoded_file_path + fd = None + filename_prefix = u'_prefix_'\ + u'\u01e0\u01e1\u01e2\u01e3\u01e4\u01e5\u01e6\u01e7\u01e8\u01e9\u01ea\u01eb\u01ec\u01ed\u01ee\u01ef'\ + u'\u01f0\u01f1\u01f2\u01f3\u01f4\u01f5\u01f6\u01f7\u01f8' # make more visible/changeable in VIM + self.assertEqual(self.FILENAME_PREFIX, filename_prefix) + (fd,encoded_test_file) = tempfile.mkstemp(prefix=filename_prefix.encode('utf-8')) \ + if sys.version_info >= (3,5) \ + else python34_unicode_mkstemp(prefix = filename_prefix) + self.assertTrue(os.path.exists(encoded_test_file)) + test_file = encoded_test_file.decode('utf-8') + obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + try: + self.sess.data_objects.register(test_file, obj_path) + results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() + result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) + result_physical_path = results[DataObject.path] + self.assertEqual(result_logical_path, obj_path) + self.assertEqual(result_physical_path, test_file) + finally: + self.sess.data_objects.unregister(obj_path) + if fd is not None: os.close(fd) + os.remove(encoded_test_file) + + class Issue_166_context: + ''' + For [irods/python-irodsclient#166] related tests + ''' + + def __init__(self, session, coll_path='test_collection_issue_166', num_objects=8, num_avus_per_object=0): + self.session = session + if '/' not in coll_path: + coll_path = '/{}/home/{}/{}'.format(self.session.zone, self.session.username, coll_path) + self.coll_path = coll_path + self.num_objects = num_objects + self.test_collection = None + self.nAVUs = num_avus_per_object + + def __enter__(self): # - prepare for context block ("with" statement) + + self.test_collection = helpers.make_test_collection( self.session, self.coll_path, obj_count=self.num_objects) + q_params = (Collection.name, DataObject.name) + + if self.nAVUs > 0: + + # - set the AVUs on the collection's objects: + for data_obj_path in map(lambda d:d[Collection.name]+"/"+d[DataObject.name], + self.session.query(*q_params).filter(Collection.name == self.test_collection.path)): + data_obj = self.session.data_objects.get(data_obj_path) + for key in (str(x) for x in py3_range(self.nAVUs)): + data_obj.metadata[key] = iRODSMeta(key, "1") + + # - in subsequent test searches, match on each AVU of every data object in the collection: + q_params += (DataObjectMeta.name,) + + # - The "with" statement receives, as context variable, a zero-arg function to build the query + return lambda : self.session.query( *q_params ).filter( Collection.name == self.test_collection.path) + + def __exit__(self,*_): # - clean up after context block + + if self.test_collection is not None: + self.test_collection.remove(recurse=True, force=True) + + if self.nAVUs > 0 and self.num_objects > 0: + helpers.remove_unused_metadata(self.session) # delete unused AVU's + + def test_query_first__166(self): + + with self.Issue_166_context(self.sess) as buildQuery: + for dummy_i in self.Iterate_to_exhaust_statement_table: + buildQuery().first() + + def test_query_one__166(self): + + with self.Issue_166_context(self.sess, num_objects = self.More_than_one_batch) as buildQuery: + + for dummy_i in self.Iterate_to_exhaust_statement_table: + query = buildQuery() + try: + query.one() + except MultipleResultsFound: + pass # irrelevant result + + def test_query_one_iter__166(self): + + with self.Issue_166_context(self.sess, num_objects = self.More_than_one_batch) as buildQuery: + + for dummy_i in self.Iterate_to_exhaust_statement_table: + + for dummy_row in buildQuery(): + break # single iteration + + def test_paging_get_batches_and_check_paging__166(self): + + with self.Issue_166_context( self.sess, num_objects = 1, + num_avus_per_object = 2 * self.More_than_one_batch) as buildQuery: + + pages = [b for b in buildQuery().get_batches()] + self.assertTrue(len(pages) > 2 and len(pages[0]) < self.More_than_one_batch) + + to_compare = [] + + for _ in self.Iterate_to_exhaust_statement_table: + + for batch in buildQuery().get_batches(): + to_compare.append(batch) + if len(to_compare) == 2: break #leave query unfinished, but save two pages to compare + + # - To make sure paging was done, we ensure that this "key" tuple (collName/dataName , metadataKey) + # is not repeated between first two pages: + + Compare_Key = lambda d: ( d[Collection.name] + "/" + d[DataObject.name], + d[DataObjectMeta.name] ) + Set0 = { Compare_Key(dct) for dct in to_compare[0] } + Set1 = { Compare_Key(dct) for dct in to_compare[1] } + self.assertTrue(len(Set0 & Set1) == 0) # assert intersection is null set + + def test_paging_get_results__166(self): + + with self.Issue_166_context( self.sess, num_objects = self.More_than_one_batch) as buildQuery: + batch_size = 0 + for result_set in buildQuery().get_batches(): + batch_size = len(result_set) + break + + self.assertTrue(0 < batch_size < self.More_than_one_batch) + + for dummy_iter in self.Iterate_to_exhaust_statement_table: + iters = 0 + for dummy_row in buildQuery().get_results(): + iters += 1 + if iters == batch_size - 1: + break # leave iteration unfinished class TestSpecificQuery(unittest.TestCase): @@ -192,7 +490,7 @@ def test_query_data_name_and_id(self): self.session, test_collection_path, obj_count=test_collection_size) # make specific query - sql = "select data_name, data_id from r_data_main join r_coll_main using (coll_id) where coll_name = '{test_collection_path}'".format(**locals()) + sql = "select DATA_NAME, DATA_ID from R_DATA_MAIN join R_COLL_MAIN using (COLL_ID) where COLL_NAME = '{test_collection_path}'".format(**locals()) alias = 'list_data_name_id' columns = [DataObject.name, DataObject.id] query = SpecificQuery(self.session, sql, alias, columns) @@ -225,7 +523,7 @@ def test_query_data_name_and_id_no_columns(self): self.session, test_collection_path, obj_count=test_collection_size) # make specific query - sql = "select data_name, data_id from r_data_main join r_coll_main using (coll_id) where coll_name = '{test_collection_path}'".format(**locals()) + sql = "select DATA_NAME, DATA_ID from R_DATA_MAIN join R_COLL_MAIN using (COLL_ID) where COLL_NAME = '{test_collection_path}'".format(**locals()) alias = 'list_data_name_id' query = SpecificQuery(self.session, sql, alias) @@ -246,7 +544,7 @@ def test_query_data_name_and_id_no_columns(self): def test_register_query_twice(self): - query = SpecificQuery(self.session, sql='select data_name from r_data_main', alias='list_data_names') + query = SpecificQuery(self.session, sql='select DATA_NAME from R_DATA_MAIN', alias='list_data_names') # register query query.register() @@ -261,7 +559,6 @@ def test_register_query_twice(self): # remove query query.remove() - def test_list_specific_queries(self): query = SpecificQuery(self.session, alias='ls') @@ -270,7 +567,15 @@ def test_list_specific_queries(self): self.assertIn('SELECT', result[1].upper()) # query string - def test_list_specific_queries_with_wrong_alias(self): + def test_list_specific_queries_with_arguments(self): + query = SpecificQuery(self.session, alias='lsl', args=['%OFFSET%']) + + for result in query: + self.assertIsNotNone(result[0]) # query alias + self.assertIn('SELECT', result[1].upper()) # query string + + + def test_list_specific_queries_with_unknown_alias(self): query = SpecificQuery(self.session, alias='foo') with self.assertRaises(CAT_UNKNOWN_SPECIFIC_QUERY): diff --git a/irods/test/rule_test.py b/irods/test/rule_test.py index dfcc624..233cdd4 100644 --- a/irods/test/rule_test.py +++ b/irods/test/rule_test.py @@ -10,6 +10,7 @@ import irods.test.helpers as helpers from irods.rule import Rule import six +from io import open as io_open class TestRule(unittest.TestCase): @@ -157,11 +158,8 @@ def test_retrieve_std_streams_from_rule(self): INPUT *some_string="{some_string}",*some_other_string="{some_other_string}",*err_string="{err_string}" OUTPUT ruleExecOut'''.format(**locals())) - with open(rule_file_path, "w") as rule_file: - if six.PY2: - rule_file.write(rule.encode('utf-8')) - else: - rule_file.write(rule) + with io_open(rule_file_path, "w", encoding='utf-8') as rule_file: + rule_file.write(rule) # run test rule myrule = Rule(session, rule_file_path) diff --git a/irods/test/setupssl.py b/irods/test/setupssl.py new file mode 100755 index 0000000..aab6bd1 --- /dev/null +++ b/irods/test/setupssl.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +from __future__ import print_function +import os +import sys +import socket +import posix +import shutil +from subprocess import (Popen, PIPE) + +IRODS_SSL_DIR = '/etc/irods/ssl' + +def create_ssl_dir(): + save_cwd = os.getcwd() + silent_run = { 'shell': True, 'stderr' : PIPE, 'stdout' : PIPE } + try: + if not (os.path.exists(IRODS_SSL_DIR)): + os.mkdir(IRODS_SSL_DIR) + os.chdir(IRODS_SSL_DIR) + Popen("openssl genrsa -out irods.key 2048",**silent_run).communicate() + with open("/dev/null","wb") as dev_null: + p = Popen("openssl req -new -x509 -key irods.key -out irods.crt -days 365 <".format(**vars(self)) + diff --git a/setup.py b/setup.py index 159f46a..d4f54aa 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ author_email='support@irods.org', description='A python API for iRODS', long_description=long_description, + long_description_content_type='text/x-rst', license='BSD', url='https://github.com/irods/python-irodsclient', keywords='irods',