diff --git a/.gitignore b/.gitignore index 120928f..6a1275a 100644 --- a/.gitignore +++ b/.gitignore @@ -31,6 +31,7 @@ nosetests.xml .settings .project .pydevproject +.idea # Vim *.s[a-w][a-z] diff --git a/CHANGELOG.rst b/CHANGELOG.rst index bbf916a..f77687b 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -1,6 +1,93 @@ Changelog ========= +v1.0.0 (2021-06-03) +------------------- +[#274] calculate common vault dir for unicode query tests [Daniel Moore] +[#269] better session cleanup [Daniel Moore] + +v0.9.0 (2021-05-14) +------------------- +[#269] cleanup() is now automatic with session destruct [Daniel Moore] +[#235] multithreaded parallel transfer for PUT and GET [Daniel Moore] +[#232] do not arbitrarily pick first replica for DEST RESC [Daniel Moore] +[#233] add null handler for irods package root [Daniel Moore] +[#246] implementation of checksum for data object manager [Daniel Moore] +[#270] speed up tests [Daniel Moore] +[#260] [irods/irods#5520] XML protocol will use BinBytesBuf in 4.2.9 [Daniel Moore] +[#221] prepare test suite for CI [Daniel Moore] +[#267] add RuleExec model for genquery [Daniel Moore] +[#263] update documentation for connection_timeout [Terrell Russell] +[#261] add temporary password support [Paul van Schayck] +[#257] better SSL examples [Terrell Russell] +[#255] make results of atomic metadata operations visible [Daniel Moore] +[#250] add exception for SYS_INVALID_INPUT_PARAM [Daniel Moore] + +v0.8.6 (2021-01-22) +------------------- +[#244] added capability to add/remove atomic metadata [Daniel Moore] +[#226] Document creation of users [Ruben Garcia] +[#230] Add force option to data_object_manager create [Ruben Garcia] +[#239] to keep the tests passing [Daniel Moore] +[#239] add iRODSUser.info attribute [Pierre Gay] +[#239] add iRODSUser.comment attribute [Pierre Gay] +[#241] [irods/irods_capability_automated_ingest#136] fix redundant disconnect [Daniel Moore] +[#227] [#228] enable ICAT entries for zones and foreign-zone users [Daniel Moore] + +v0.8.5 (2020-11-10) +------------------- +[#220] Use connection create time to determine stale connections [Kaivan Kamali] + +v0.8.4 (2020-10-19) +------------------- +[#221] fix tests which were failing in Py3.4 and 3.7 [Daniel Moore] +[#220] Replace stale connections pulled from idle pools [Kaivan Kamali] +[#3] tests failing on Python3 unicode defaults [Daniel Moore] +[#214] store/load rules as utf-8 in files [Daniel Moore] +[#211] set and report application name to server [Daniel Moore] +[#156] skip ssh/pam login tests if user doesn't exist [Daniel Moore] +[#209] pam/ssl/env auth tests imported from test harness [Daniel Moore] +[#209] store hashed PAM pw [Daniel Moore] +[#205] Disallow PAM plaintext passwords as strong default [Daniel Moore] +[#156] fix the PAM authentication with env json file. [Patrice Linel] +[#207] add raw-acl permissions getter [Daniel Moore] + +v0.8.3 (2020-06-05) +------------------- +- [#3] remove order sensitivity in test_user_dn [Daniel Moore] +- [#5] clarify unlink specific replica example [Terrell Russell] +- [irods/irods#4796] add data object copy tests [Daniel Moore] +- [#5] Additional sections and examples in README [Daniel Moore] +- [#187] Allow query on metadata create and modify times [Daniel Moore] +- [#135] fix queries for multiple AVUs of same name [Daniel Moore] +- [#135] Allow multiple criteria based on column name [Daniel Moore] +- [#180] add the "in" genquery operator [Daniel Moore] +- [#183] fix key error when tables from order_by() not in query() [Daniel Moore] +- [#5] fix ssl example in README.rst [Terrell Russell] + +v0.8.2 (2019-11-13) +------------------- +- [#8] Add PAM Authentication handling (still needs tests) [Mattia D'Antonio] +- [#5] Remove commented-out import [Alan King] +- [#5] Add .idea directory to .gitignore [Jonathan Landrum] +- [#150] Fix specific query argument labeling [Chris Klimowski] +- [#148] DataObjectManager.put() can return the new data_object [Jonathan Landrum] +- [#124] Convert strings going to irods to Unicode [Alan King] +- [#161] Allow dynamic I/O for rule from file [Mathijs Koymans] +- [#162] Include resc_hier in replica information [Brett Hartley] +- [#165] Fix CAT_STATEMENT_TABLE_FULL by auto closing queries [Chris Smeele] +- [#166] Test freeing statements in unfinished query [Daniel Moore] +- [#167] Add metadata for user and usergroup objects [Erwin van Wieringen] +- [#175] Add metadata property for instances of iRODSResource [Daniel Moore] +- [#163] add keywords to query objects [Daniel Moore] + +v0.8.1 (2018-09-27) +------------------- +- [#140] Remove randomization from password test [Alan King] +- [#139] Use uppercase queries in tests [Alan King] +- [#137] Handle filenames with ampersands [Alan King] +- [#126] Add size attribute to iRODSReplica [Alan King] + v0.8.0 (2018-05-03) ------------------- - Add rescName and replNum awareness. [Hao Xu] diff --git a/Dockerfile.prc_test.centos b/Dockerfile.prc_test.centos new file mode 100644 index 0000000..4c8668e --- /dev/null +++ b/Dockerfile.prc_test.centos @@ -0,0 +1,29 @@ +ARG os_image +FROM ${os_image} +ARG log_output_dir=/tmp +ENV LOG_OUTPUT_DIR="$log_output_dir" +ARG py_N +ENV PY_N "$py_N" + +RUN yum install -y epel-release +RUN yum install -y git nmap-ncat sudo +RUN yum install -y python${py_N} python${py_N}-pip +RUN useradd -md /home/user -s /bin/bash user +RUN echo "user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +WORKDIR /home/user +COPY ./ ./repo/ +RUN chown -R user repo/ +USER user +RUN pip${py_N} install --user --upgrade pip==20.3.4 # - limit pip version for C7 system python2.7 +RUN cd repo && python${py_N} -m pip install --user . +RUN python${py_N} repo/docker_build/iinit.py \ + host irods-provider \ + port 1247 \ + user rods \ + zone tempZone \ + password rods +SHELL ["/bin/bash","-c"] +CMD echo "Waiting on iRODS server... " ; \ + python${PY_N} repo/docker_build/recv_oneshot -h irods-provider -p 8888 -t 360 && \ + sudo groupadd -o -g $(stat -c%g /irods_shared) irods && sudo usermod -aG irods user && \ + newgrp irods < repo/run_python_tests.sh diff --git a/Dockerfile.prc_test.ubuntu b/Dockerfile.prc_test.ubuntu new file mode 100644 index 0000000..3d42d5a --- /dev/null +++ b/Dockerfile.prc_test.ubuntu @@ -0,0 +1,36 @@ +ARG os_image +FROM ${os_image} +ARG log_output_dir=/tmp +ENV LOG_OUTPUT_DIR="$log_output_dir" +ARG py_N +ENV PY_N "$py_N" + +RUN apt update +RUN apt install -y git netcat-openbsd sudo +RUN apt install -y python${py_N} python${py_N}-pip +RUN useradd -md /home/user -s /bin/bash user +RUN echo "user ALL=(ALL) NOPASSWD: ALL" >> /etc/sudoers +WORKDIR /home/user +COPY ./ ./repo/ +RUN chown -R user repo/ +USER user +RUN pip${py_N} install --user --upgrade pip==20.3.4 # -- version specified for Ub16 +RUN cd repo && python${py_N} -m pip install --user . +RUN python${py_N} repo/docker_build/iinit.py \ + host irods-provider \ + port 1247 \ + user rods \ + zone tempZone \ + password rods + +SHELL ["/bin/bash","-c"] + +# -- At runtime: -- +# 1. wait for provider to run. +# 2. give user group permissions to access shared irods directories +# 3. run python tests as the new group + +CMD echo "Waiting on iRODS server... " ; \ + python${PY_N} repo/docker_build/recv_oneshot -h irods-provider -p 8888 -t 360 && \ + sudo groupadd -o -g $(stat -c%g /irods_shared) irods && sudo usermod -aG irods user && \ + newgrp irods < repo/run_python_tests.sh diff --git a/MANIFEST.in b/MANIFEST.in index 3c469e1..7d5f943 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1 +1 @@ -include AUTHORS CHANGELOG.rst LICENSE.txt README.rst irods/test/README.rst irods/test/unicode_sampler.xml \ No newline at end of file +include AUTHORS CHANGELOG.rst LICENSE.txt README.rst irods/test/README.rst irods/test/unicode_sampler.xml irods/test/test-data/*.json \ No newline at end of file diff --git a/README.rst b/README.rst index 4c2698c..116c390 100644 --- a/README.rst +++ b/README.rst @@ -2,46 +2,46 @@ Python iRODS Client (PRC) ========================= -`iRODS `_ is an open source distributed data management system. This is a client API implemented in python. +`iRODS `_ is an open source distributed data management system. This is a client API implemented in Python. Currently supported: -- Establish a connection to iRODS, authenticate -- Implement basic Gen Queries (select columns and filtering) -- Support more advanced Gen Queries with limits, offsets, and aggregations +- Python 2.7, 3.4 or newer +- Establish a connection to iRODS +- Authenticate via password, GSI, PAM +- iRODS connection over SSL +- Implement basic GenQueries (select columns and filtering) +- Support more advanced GenQueries with limits, offsets, and aggregations - Query the collections and data objects within a collection - Execute direct SQL queries - Execute iRODS rules - Support read, write, and seek operations for files -- PUT/GET data objects -- Create data objects -- Delete data objects +- Parallel PUT/GET data objects - Create collections +- Rename collections - Delete collections +- Create data objects - Rename data objects -- Rename collections +- Delete data objects - Register files and directories - Query metadata for collections and data objects - Add, edit, remove metadata - Replicate data objects to different resource servers - Connection pool management -- Implement gen query result sets as lazy queries +- Implement GenQuery result sets as lazy queries - Return empty result sets when CAT_NO_ROWS_FOUND is raised - Manage permissions - Manage users and groups - Manage resources -- GSI authentication - Unicode strings - Ticket based access -- iRODS connection over SSL -- Python 2.7, 3.4 or newer Installing ---------- PRC requires Python 2.7 or 3.4+. -To install with pip:: +Canonically, to install with pip:: pip install python-irodsclient @@ -49,7 +49,6 @@ or:: pip install git+https://github.com/irods/python-irodsclient.git[@branch|@commit|@tag] - Uninstalling ------------ @@ -57,21 +56,39 @@ Uninstalling pip uninstall python-irodsclient +Hazard: Outdated Python +-------------------------- +With older versions of Python (as of this writing, the aforementioned 2.7 and 3.4), we +can take preparatory steps toward securing workable versions of pip and virtualenv by +using these commands:: -Establishing a connection -------------------------- + $ pip install --upgrade --user pip'<21.0' + $ python -m pip install --user virtualenv + +We are then ready to use any of the following commands relevant to and required for the +installation:: -Using environment files in ``~/.irods/``: + $ python -m virtualenv ... + $ python -m pip install ... + + +Establishing a (secure) connection +---------------------------------- + +Using environment files (including any SSL settings) in ``~/.irods/``: >>> import os +>>> import ssl >>> from irods.session import iRODSSession >>> try: ... env_file = os.environ['IRODS_ENVIRONMENT_FILE'] ... except KeyError: ... env_file = os.path.expanduser('~/.irods/irods_environment.json') ... ->>> with iRODSSession(irods_env_file=env_file) as session: -... pass +>>> ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) +>>> ssl_settings = {'ssl_context': ssl_context} +>>> with iRODSSession(irods_env_file=env_file, **ssl_settings) as session: +... # workload ... >>> @@ -79,7 +96,7 @@ Passing iRODS credentials as keyword arguments: >>> from irods.session import iRODSSession >>> with iRODSSession(host='localhost', port=1247, user='bob', password='1234', zone='tempZone') as session: -... pass +... # workload ... >>> @@ -88,12 +105,76 @@ If you're an administrator acting on behalf of another user: >>> from irods.session import iRODSSession >>> with iRODSSession(host='localhost', port=1247, user='rods', password='1234', zone='tempZone', client_user='bob', client_zone='possibly_another_zone') as session: -... pass +... # workload ... >>> If no ``client_zone`` is provided, the ``zone`` parameter is used in its place. +A pure Python SSL session (without a local `env_file`) requires a few more things defined: + +>>> import ssl +>>> from irods.session import iRODSSession +>>> ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile='CERTNAME.crt', capath=None, cadata=None) +>>> ssl_settings = {'client_server_negotiation': 'request_server_negotiation', +... 'client_server_policy': 'CS_NEG_REQUIRE', +... 'encryption_algorithm': 'AES-256-CBC', +... 'encryption_key_size': 32, +... 'encryption_num_hash_rounds': 16, +... 'encryption_salt_size': 8, +... 'ssl_context': ssl_context} +>>> +>>> with iRODSSession(host='HOSTNAME_DEFINED_IN_CAFILE_ABOVE', port=1247, user='bob', password='1234', zone='tempZone', **ssl_settings) as session: +... # workload +>>> + + +Maintaining a connection +------------------------ + +The default library timeout for a connection to an iRODS Server is 120 seconds. + +This can be overridden by changing the session `connection_timeout` immediately after creation of the session object: + +>>> session.connection_timeout = 300 + +This will set the timeout to five minutes for any associated connections. + +Session objects and cleanup +--------------------------- + +When iRODSSession objects are kept as state in an application, spurious SYS_HEADER_READ_LEN_ERR errors +can sometimes be seen in the connected iRODS server's log file. This is frequently seen at program exit +because socket connections are terminated without having been closed out by the session object's +cleanup() method. + +Starting with PRC Release 0.9.0, code has been included in the session object's __del__ method to call +cleanup(), properly closing out network connections. However, __del__ cannot be relied to run under all +circumstances (Python2 being more problematic), so an alternative may be to call session.cleanup() on +any session variable which might not be used again. + + +Simple PUTs and GETs +-------------------- + +We can use the just-created session object to put files to (or get them from) iRODS. + +>>> logical_path = "/{0.zone}/home/{0.username}/{1}".format(session,"myfile.dat") +>>> session.data_objects.put( "myfile.dat", logical_path) +>>> session.data_objects.get( logical_path, "/tmp/myfile.dat.copy" ) + +Note that local file paths may be relative, but iRODS data objects must always be referred to by +their absolute paths. This is in contrast to the ``iput`` and ``iget`` icommands, which keep +track of the current working collection (as modified by ``icd``) for the unix shell. + + +Parallel Transfer +----------------- + +Starting with release 0.9.0, data object transfers using put() and get() will spawn a number +of threads in order to optimize performance for iRODS server versions 4.2.9+ and file sizes +larger than a default threshold value of 32 Megabytes. + Working with collections ------------------------ @@ -184,26 +265,123 @@ bar Working with metadata --------------------- +To enumerate AVU's on an object. With no metadata attached, the result is an empty list: + + +>>> from irods.meta import iRODSMeta >>> obj = session.data_objects.get("/tempZone/home/rods/test1") >>> print(obj.metadata.items()) [] + +We then add some metadata. +Just as with the icommand equivalent "imeta add ...", we can add multiple AVU's with the same name field: + + >>> obj.metadata.add('key1', 'value1', 'units1') >>> obj.metadata.add('key1', 'value2') >>> obj.metadata.add('key2', 'value3') +>>> obj.metadata.add('key2', 'value4') +>>> print(obj.metadata.items()) +[, , +, ] + + +We can also use Python's item indexing syntax to perform the equivalent of an "imeta set ...", e.g. overwriting +all AVU's with a name field of "key2" in a single update: + + +>>> new_meta = iRODSMeta('key2','value5','units2') +>>> obj.metadata[new_meta.name] = new_meta >>> print(obj.metadata.items()) -[, , -] +[, , + ] + + +Now, with only one AVU on the object with a name of "key2", *get_one* is assured of not throwing an exception: ->>> print(obj.metadata.get_all('key1')) -[, ] >>> print(obj.metadata.get_one('key2')) - + + + +However, the same is not true of "key1": + + +>>> print(obj.metadata.get_one('key1')) +Traceback (most recent call last): + File "", line 1, in + File "/[...]/python-irodsclient/irods/meta.py", line 41, in get_one + raise KeyError +KeyError + + +Finally, to remove a specific AVU from an object: + >>> obj.metadata.remove('key1', 'value1', 'units1') >>> print(obj.metadata.items()) -[, ] +[, ] + + +Alternately, this form of the remove() method can also be useful: + + +>>> for avu in obj.metadata.items(): +... obj.metadata.remove(avu) +>>> print(obj.metadata.items()) +[] + + +If we intended on deleting the data object anyway, we could have just done this instead: + + +>>> obj.unlink(force=True) + + +But notice that the force option is important, since a data object in the trash may still have AVU's attached. + +At the end of a long session of AVU add/manipulate/delete operations, one should make sure to delete all unused +AVU's. We can in fact use any *\*Meta* data model in the queries below, since unattached AVU's are not aware +of the (type of) catalog object they once annotated: + + +>>> from irods.models import (DataObjectMeta, ResourceMeta) +>>> len(list( session.query(ResourceMeta) )) +4 +>>> from irods.test.helpers import remove_unused_metadata +>>> remove_unused_metadata(session) +>>> len(list( session.query(ResourceMeta) )) +0 + + +Atomic operations on metadata +----------------------------- + +With release 4.2.8 of iRODS, the atomic metadata API was introduced to allow a group of metadata add and remove +operations to be performed transactionally, within a single call to the server. This capability can be leveraged in +version 0.8.6 of the PRC. + +So, for example, if 'obj' is a handle to an object in the iRODS catalog (whether a data object, collection, user or +storage resource), we can send an arbitrary number of AVUOperation instances to be executed together as one indivisible +operation on that object: + +>>> from irods.meta import iRODSMeta, AVUOperation +>>> obj.metadata.apply_atomic_operations( AVUOperation(operation='remove', avu=iRODSMeta('a1','v1','these_units')), +... AVUOperation(operation='add', avu=iRODSMeta('a2','v2','those_units')), +... AVUOperation(operation='remove', avu=iRODSMeta('a3','v3')) # , ... +... ) + +The list of operations will applied in the order given, so that a "remove" followed by an "add" of the same AVU +is, in effect, a metadata "set" operation. Also note that a "remove" operation will be ignored if the AVU value given +does not exist on the target object at that point in the sequence of operations. + +We can also source from a pre-built list of AVUOperations using Python's `f(*args_list)` syntax. For example, this +function uses the atomic metadata API to very quickly remove all AVUs from an object: + +>>> def remove_all_avus( Object ): +... avus_on_Object = Object.metadata.items() +... Object.metadata.apply_atomic_operations( *[AVUOperation(operation='remove', avu=i) for i in avus_on_Object] ) General queries @@ -235,6 +413,31 @@ General queries /tempZone/home/rods/manager/user_manager.py id=212669 size=5509 /tempZone/home/rods/manager/user_manager.pyc id=212658 size=5233 +Query using other models: + +>>> from irods.column import Criterion +>>> from irods.models import DataObject, DataObjectMeta, Collection, CollectionMeta +>>> from irods.session import iRODSSession +>>> import os +>>> env_file = os.path.expanduser('~/.irods/irods_environment.json') +>>> with iRODSSession(irods_env_file=env_file) as session: +... # by metadata +... # equivalent to 'imeta qu -C type like Project' +... results = session.query(Collection, CollectionMeta).filter( \ +... Criterion('=', CollectionMeta.name, 'type')).filter( \ +... Criterion('like', CollectionMeta.value, '%Project%')) +... for r in results: +... print(r[Collection.name], r[CollectionMeta.name], r[CollectionMeta.value], r[CollectionMeta.units]) +... +('/tempZone/home/rods', 'type', 'Project', None) + +Beginning with version 0.8.3 of PRC, the 'in' genquery operator is also available: + +>>> from irods.models import Resource +>>> from irods.column import In +>>> [ resc[Resource.id]for resc in session.query(Resource).filter(In(Resource.name, ['thisResc','thatResc'])) ] +[10037,10038] + Query with aggregation(min, max, sum, avg, count): >>> with iRODSSession(irods_env_file=env_file) as session: @@ -294,6 +497,7 @@ user_manager.py 212669 __init__.py 212670 __init__.pyc 212671 + Recherché queries ----------------- @@ -318,6 +522,262 @@ not reside in the trash. >>> pprint( list( chained_results ) ) +Instantiating iRODS objects from query results +---------------------------------------------- +The General query works well for getting information out of the ICAT if all we're interested in is +information representable with +primitive types (ie. object names, paths, and ID's, as strings or integers). But Python's object orientation also +allows us to create object references to mirror the persistent entities (instances of *Collection*, *DataObject*, *User*, or *Resource*, etc.) +inhabiting the ICAT. + +**Background:** +Certain iRODS object types can be instantiated easily using the session object's custom type managers, +particularly if some parameter (often just the name or path) of the object is already known: + +>>> type(session.users) + +>>> u = session.users.get('rods') +>>> u.id +10003 + +Type managers are good for specific operations, including object creation and removal:: + +>>> session.collections.create('/tempZone/home/rods/subColln') +>>> session.collections.remove('/tempZone/home/rods/subColln') +>>> session.data_objects.create('/tempZone/home/rods/dataObj') +>>> session.data_objects.unlink('/tempZone/home/rods/dataObj') + +When we retrieve a reference to an existing collection using *get* : + +>>> c = session.collections.get('/tempZone/home/rods') +>>> c + + + +we have, in that variable *c*, a reference to an iRODS *Collection* object whose properties provide +useful information: + +>>> [ x for x in dir(c) if not x.startswith('__') ] +['_meta', 'data_objects', 'id', 'manager', 'metadata', 'move', 'name', 'path', 'remove', 'subcollections', 'unregister', 'walk'] +>>> c.name +'rods' +>>> c.path +'/tempZone/home/rods' +>>> c.data_objects +[] +>>> c.metadata.items() +[ <... list of AVU's attached to Collection c ... > ] + +or whose methods can do useful things: + +>>> for sub_coll in c.walk(): print('---'); pprint( sub_coll ) +[ ...< series of Python data structures giving the complete tree structure below collection 'c'> ...] + +This approach of finding objects by name, or via their relations with other objects (ie "contained by", or in the case of metadata, "attached to"), +is helpful if we know something about the location or identity of what we're searching for, but we don't always +have that kind of a-priori knowledge. + +So, although we can (as seen in the last example) walk an *iRODSCollection* recursively to discover all subordinate +collections and their data objects, this approach will not always be best +for a given type of application or data discovery, especially in more advanced +use cases. + +**A Different Approach:** +For the PRC to be sufficiently powerful for general use, we'll often need at least: + +* general queries, and +* the capabilities afforded by the PRC's object-relational mapping. + +Suppose, for example, we wish to enumerate all collections in the iRODS catalog. + +Again, the object managers are the answer, but they are now invoked using a different scheme: + +>>> from irods.collection import iRODSCollection; from irods.models import Collection +>>> all_collns = [ iRODSCollection(session.collections,result) for result in session.query(Collection) ] + +From there, we have the ability to do useful work, or filtering based on the results of the enumeration. +And, because *all_collns* is an iterable of true objects, we can either use Python's list comprehensions or +execute more catalog queries to achieve further aims. + +Note that, for similar system-wide queries of Data Objects (which, as it happens, are inextricably joined to their +parent Collection objects), a bit more finesse is required. Let us query, for example, to find all data +objects in a particular zone with an AVU that matches the following condition:: + + META_DATA_ATTR_NAME = "irods::alert_time" and META_DATA_ATTR_VALUE like '+0%' + + +>>> import irods.keywords +>>> from irods.data_object import iRODSDataObject +>>> from irods.models import DataObjectMeta, DataObject +>>> from irods.column import Like +>>> q = session.query(DataObject).filter( DataObjectMeta.name == 'irods::alert_time', + Like(DataObjectMeta.value, '+0%') ) +>>> zone_hint = "" # --> add a zone name in quotes to search another zone +>>> if zone_hint: q = q.add_keyword( irods.keywords.ZONE_KW, zone_hint ) +>>> for res in q: +... colln_id = res [DataObject.collection_id] +... collObject = get_collection( colln_id, session, zone = zone_hint) +... dataObject = iRODSDataObject( session.data_objects, parent = collObject, results=[res]) +... print( '{coll}/{data}'.format (coll = collObject.path, data = dataObject.name)) + + +In the above loop we have used a helper function, *get_collection*, to minimize the number of hits to the object +catalog. Otherwise, me might find within a typical application that some Collection objects are being queried at +a high rate of redundancy. *get_collection* can be implemented thusly: + +.. code:: Python + + import collections # of the Pythonic, not iRODS, kind + def makehash(): + # see https://stackoverflow.com/questions/651794/whats-the-best-way-to-initialize-a-dict-of-dicts-in-python + return collections.defaultdict(makehash) + from irods.collection import iRODSCollection + from irods.models import Collection + def get_collection (Id, session, zone=None, memo = makehash()): + if not zone: zone = "" + c_obj = memo[session][zone].get(Id) + if c_obj is None: + q = session.query(Collection).filter(Collection.id==Id) + if zone != '': q = q.add_keyword( irods.keywords.ZONE_KW, zone ) + c_id = q.one() + c_obj = iRODSCollection(session, result = c_id) + memo[session][zone][Id] = c_obj + return c_obj + + +Once instantiated, of course, any *iRODSDataObject*'s data to which we have access permissions is available via its open() method. + +As stated, this type of object discovery requires some extra study and effort, but the ability to search arbitrary iRODS zones +(to which we are federated and have the user permissions) is powerful indeed. + + +Tracking and manipulating replicas of Data objects +-------------------------------------------------- + +Putting together the techniques we've seen so far, it's not hard to write functions +that achieve useful, common goals. Suppose that for all data objects containing replicas on +a given named resource (the "source") we want those replicas "moved" to a second, or +"destination" resource. We can achieve it with a function such as the one below. It +achieves the move via a replication of the data objects found to the destination +resource , followed by a trimming of each replica from the source. We assume for our current +purposed that all replicas are "good", ie have a status of "1" :: + + from irods.resource import iRODSResource + from irods.collection import iRODSCollection + from irods.data_object import iRODSDataObject + from irods.models import Resource,Collection,DataObject + def repl_and_trim (srcRescName, dstRescName = '', verbose = False): + objects_trimmed = 0 + q = session.query(Resource).filter(Resource.name == srcRescName) + srcResc = iRODSResource( session.resources, q.one()) + # loop over data objects found on srcResc + for q_row in session.query(Collection,DataObject) \ + .filter(DataObject.resc_id == srcResc.id): + collection = iRODSCollection (session.collections, result = q_row) + data_object = iRODSDataObject (session.data_objects, parent = collection, results = (q_row,)) + objects_trimmed += 1 + if verbose : + import pprint + print( '--------', data_object.name, '--------') + pprint.pprint( [vars(r) for r in data_object.replicas if + r.resource_name == srcRescName] ) + if dstRescName: + objects_trimmed += 1 + data_object.replicate(dstRescName) + for replica_number in [r.number for r in data_object.replicas]: + options = { kw.DATA_REPL_KW: replica_number } + data_object.unlink( **options ) + return objects_trimmed + + +Listing Users and Groups ; calculating Group Membership +------------------------------------------------------- + +iRODS tracks groups and users using two tables, R_USER_MAIN and R_USER_GROUP. +Under this database schema, all "user groups" are also users: + +>>> from irods.models import User, UserGroup +>>> from pprint import pprint +>>> pprint(list( [ (x[User.id], x[User.name]) for x in session.query(User) ] )) +[(10048, 'alice'), + (10001, 'rodsadmin'), + (13187, 'bobby'), + (10045, 'collab'), + (10003, 'rods'), + (13193, 'empty'), + (10002, 'public')] + +But it's also worth noting that the User.type field will be 'rodsgroup' for any +user ID that iRODS internally recognizes as a "Group": + +>>> groups = session.query(User).filter( User.type == 'rodsgroup' ) + +>>> [x[User.name] for x in groups] +['collab', 'public', 'rodsadmin', 'empty'] + +Since we can instantiate iRODSUserGroup and iRODSUser objects directly from the rows of +a general query on the corresponding tables, it is also straightforward to trace out +the groups' memberships: + +>>> from irods.user import iRODSUser, iRODSUserGroup +>>> grp_usr_mapping = [ (iRODSUserGroup ( session.user_groups, result), iRODSUser (session.users, result)) \ +... for result in session.query(UserGroup,User) ] +>>> pprint( [ (x,y) for x,y in grp_usr_mapping if x.id != y.id ] ) +[(, ), + (, ), + (, ), + (, ), + (, ), + (, )] + +(Note that in general queries, fields cannot be compared to each other, only to literal constants; thus +the '!=' comparison in the Python list comprehension.) + +From the above, we can see that the group 'collab' (with user ID 10045) contains users 'bobby'(13187) and +'alice'(10048) but not 'rods'(10003), as the tuple (10045,10003) is not listed. Group 'rodsadmin'(10001) +contains user 'rods'(10003) but no other users; and group 'public'(10002) by default contains all canonical +users (those whose User.type is 'rodsadmin' or 'rodsuser'). The empty group ('empty') has no users as +members, so it doesn't show up in our final list. + + +Getting and setting permissions +------------------------------- + +We can find the ID's of all the collections writable (ie having "modify" ACL) by, but not owned by, +alice (or even alice#otherZone): + +>>> from irods.models import Collection,CollectionAccess,CollectionUser,User +>>> from irods.column import Like +>>> q = session.query (Collection,CollectionAccess).filter( +... CollectionUser.name == 'alice', # User.zone == 'otherZone', # zone optional +... Like(CollectionAccess.name, 'modify%') ) #defaults to current zone + +If we then want to downgrade those permissions to read-only, we can do the following: + +>>> from irods.access import iRODSAccess +>>> for c in q: +... session.permissions.set( iRODSAccess('read', c[Collection.name], 'alice', # 'otherZone' # zone optional +... )) + +We can also query on access type using its numeric value, which will seem more natural to some: + +>>> OWN = 1200; MODIFY = 1120 ; READ = 1050 +>>> from irods.models import DataAccess, DataObject, User +>>> data_objects_writable = list(session.query(DataObject,DataAccess,User)).filter(User.name=='alice', DataAccess.type >= MODIFY) + + +Managing users +-------------- + +You can create a user in the current zone (with an optional auth_str): + +>>> session.users.create('user', 'rodsuser', 'MyZone', auth_str) + +If you want to create a user in a federated zone, use: + +>>> session.users.create('user', 'rodsuser', 'OtherZone', auth_str) + + And more... ----------- diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..0aa372d --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,36 @@ +version: '3' +services: + + icat: + image: postgres:10 + environment: + - POSTGRES_HOST_AUTH_METHOD=md5 + - POSTGRES_PASSWORD=pg_password + + irods-provider: + hostname: irods-provider + build: + context: docker_build + dockerfile: Dockerfile.provider + volumes: + - "${irods_pkg_dir}:/irods_packages:ro" + - ./irods_shared:/irods_shared:rw + depends_on: + - icat + networks: + default: + aliases: + - irods-provider + + client-runner: + env_file: client-runner.env + volumes: + - ./irods_shared:/irods_shared:rw + build: + context: . + dockerfile: Dockerfile.prc_test.${client_os_generic} + args: + os_image: "$client_os_image" + py_N: "$python_version" + depends_on: + - irods-provider diff --git a/docker_build/Dockerfile.provider b/docker_build/Dockerfile.provider new file mode 100644 index 0000000..f901dc1 --- /dev/null +++ b/docker_build/Dockerfile.provider @@ -0,0 +1,41 @@ +FROM ubuntu:18.04 + +ARG irods_pkg_dir + +RUN apt update +RUN apt install -y wget sudo lsb-release apt-transport-https gnupg2 postgresql-client +RUN wget -qO - https://packages.irods.org/irods-signing-key.asc | sudo apt-key add - +RUN echo "deb [arch=amd64] https://packages.irods.org/apt/ $(lsb_release -sc) main" | sudo tee /etc/apt/sources.list.d/renci-irods.list +RUN apt update + +SHELL [ "/bin/bash","-c" ] + +COPY ICAT.sql /tmp +COPY pgpass root/.pgpass +RUN chmod 600 root/.pgpass + +RUN apt install -y rsyslog gawk +ADD build_deps_list wait_on_condition send_oneshot /tmp/ + +# At Runtime: 1. Install apt dependencies for the iRODS package files given. +# 2. Install the package files. +# 3. Wait on database container. +# 4. Configure iRODS provider and make sure it is running. +# 5. Open a server port, informing the client to start tests now that iRODS is up. +# 6. Configure shared folder for tests that need to register data objects. +# (We opt out if /irods_shared does not exist, ie is omitted in the docker-compose.yml). +# 7. Wait forever. + +CMD apt install -y $(/tmp/build_deps_list /irods_packages/irods*{serv,dev,icommand,runtime,database-*postgres}*.deb) && \ + dpkg -i /irods_packages/irods*{serv,dev,icommand,runtime,database-*postgres}*.deb && \ + /tmp/wait_on_condition -i 5 -n 12 "psql -h icat -U postgres -c '\\l' >/dev/null" && \ + psql -h icat -U postgres -f /tmp/ICAT.sql && \ + sed 's/localhost/icat/' < /var/lib/irods/packaging/localhost_setup_postgres.input \ + | python /var/lib/irods/scripts/setup_irods.py && \ + { pgrep -u irods irodsServer >/dev/null || su irods -c '~/irodsctl start'; \ + env PORT=8888 /tmp/send_oneshot "iRODS is running..." & } && \ + { [ ! -d /irods_shared ] || { mkdir -p /irods_shared/reg_resc && mkdir -p /irods_shared/tmp && \ + chown -R irods.irods /irods_shared && chmod g+ws /irods_shared/tmp && \ + chmod 777 /irods_shared/reg_resc ; } } && \ + echo $'*********\n' $'*********\n' $'*********\n' $'*********\n' $'*********\n' IRODS IS UP && \ + tail -f /dev/null diff --git a/docker_build/ICAT.sql b/docker_build/ICAT.sql new file mode 100644 index 0000000..abb706a --- /dev/null +++ b/docker_build/ICAT.sql @@ -0,0 +1,3 @@ +CREATE USER irods WITH PASSWORD 'testpassword'; +CREATE DATABASE "ICAT"; +GRANT ALL PRIVILEGES ON DATABASE "ICAT" TO irods; diff --git a/docker_build/build_deps_list b/docker_build/build_deps_list new file mode 100755 index 0000000..7bf3798 --- /dev/null +++ b/docker_build/build_deps_list @@ -0,0 +1,22 @@ +#!/bin/bash + +build_deps_list() +{ + local -A pkglist + local pkg + while [ $# -gt 0 ] + do + while read f + do + if [[ ! $f =~ \(.*\)\s*$ ]]; then # todo: include version-specific ? + pkglist["$f"]="" + fi + done < <(dpkg -I "$1"|grep -i '^ *depends:'|tr ',:' \\n | tail -n +2) + shift + done + for pkg in "${!pkglist[@]}" # package list de-duped by associative array + do + echo "$pkg" + done +} +build_deps_list "$@" diff --git a/docker_build/iinit.py b/docker_build/iinit.py new file mode 100644 index 0000000..81365d8 --- /dev/null +++ b/docker_build/iinit.py @@ -0,0 +1,44 @@ +from getpass import getpass +from irods.password_obfuscation import encode +import json +import os +import sys +from os import chmod +from os.path import expanduser,exists,join +from getopt import getopt + + +home_env_path = expanduser('~/.irods') +env_file_path = join(home_env_path,'irods_environment.json') +auth_file_path = join(home_env_path,'.irodsA') + + +def do_iinit(host, port, user, zone, password): + if not exists(home_env_path): + os.makedirs(home_env_path) + else: + raise RuntimeError('~/.irods already exists') + + with open(env_file_path,'w') as env_file: + json.dump ( { "irods_host": host, + "irods_port": int(port), + "irods_user_name": user, + "irods_zone_name": zone }, env_file, indent=4) + with open(auth_file_path,'w') as auth_file: + auth_file.write(encode(password)) + chmod (auth_file_path,0o600) + + +def get_kv_pairs_from_cmdline(*args): + arglist = list(args) + while arglist: + k = arglist.pop(0) + v = arglist.pop(0) + yield k,v + + +if __name__ == '__main__': + import sys + args = sys.argv[1:] + dct = {k:v for k,v in get_kv_pairs_from_cmdline(*args)} + do_iinit(**dct) diff --git a/docker_build/pgpass b/docker_build/pgpass new file mode 100644 index 0000000..55a6bdf --- /dev/null +++ b/docker_build/pgpass @@ -0,0 +1 @@ +icat:5432:postgres:postgres:pg_password diff --git a/docker_build/recv_oneshot b/docker_build/recv_oneshot new file mode 100755 index 0000000..47e2bdd --- /dev/null +++ b/docker_build/recv_oneshot @@ -0,0 +1,35 @@ +#!/usr/bin/env python +from __future__ import print_function +import sys, os, time +from socket import * +import getopt + +def try_connect(host,port): + try: + s=socket(AF_INET,SOCK_STREAM) + s.connect((host,port)) + return s + except: + s.close() + return None + +# Options: +# +# -t timeout +# -h host +# -p port + +t = now = time.time() +opts = dict(getopt.getopt(sys.argv[1:],'t:h:p:')[0]) + +host = opts['-h'] +port = int(opts['-p']) +timeout = float(opts['-t']) + +while time.time() < now + timeout: + time.sleep(1) + s = try_connect(host, port) + if s: + print(s.recv(32767).decode('utf-8'),end='') + exit(0) +exit(1) diff --git a/docker_build/send_oneshot b/docker_build/send_oneshot new file mode 100755 index 0000000..b265af1 --- /dev/null +++ b/docker_build/send_oneshot @@ -0,0 +1,6 @@ +#!/usr/bin/gawk -f +BEGIN { + SERVER = "/inet/tcp/"ENVIRON["PORT"]"/0/0" + print ARGV[1] " - " strftime() |& SERVER + close(SERVER) +} diff --git a/docker_build/wait_on_condition b/docker_build/wait_on_condition new file mode 100755 index 0000000..ce2c29b --- /dev/null +++ b/docker_build/wait_on_condition @@ -0,0 +1,34 @@ +#!/bin/bash + +# wait for a program to run with 0 return status + +interval=3; ntimes=20; verbose="" + +usage() { + echo "$0 [options] " + printf "\t options are: -i (default %d)\n" $interval + printf "\t -n (default %d)\n" $ntimes + printf "\t -v : for verbose reporting\n" + exit 1 +} >&2 + +while [[ "$1" = -* ]] ; do + case $1 in + -i) shift; interval=$1; shift ;; + -n) shift; ntimes=$1; shift ;; + -v) verbose=1 ; shift;; + *) usage;; + esac +done +[ $# -eq 0 ] && usage + +n=1 +while : ; do + eval "$@" + STATUS=$? + [ -n "$verbose" ] && echo "$n:" 'STATUS =' $STATUS `date` + [ $((++n)) -gt $ntimes -o $STATUS -eq 0 ] && break + sleep $interval +done + +exit $STATUS diff --git a/irods/__init__.py b/irods/__init__.py index 4f43c19..d88d0d4 100644 --- a/irods/__init__.py +++ b/irods/__init__.py @@ -1,5 +1,32 @@ from .version import __version__ +import logging +logger = logging.getLogger(__name__) +logger.addHandler(logging.NullHandler()) +gHandler = None + +def client_logging(flag=True,handler=None): + """ + Example of use: + + import irods + # Enable / Disable general client logging + irods.client_logging(True[,handler]) -> handler + # (handler is a StreamHandler to stderr by default) + irods.client_logging(False) # - disable irods client logging + """ + global gHandler + if flag: + if handler is not None: + if gHandler: logger.removeHandler(gHandler) + if not handler: handler = logging.StreamHandler() + gHandler = handler + logger.addHandler(handler) + else: + if gHandler: logger.removeHandler(gHandler) + gHandler = None + return gHandler + # Magic Numbers MAX_PASSWORD_LENGTH = 50 MAX_SQL_ATTR = 50 @@ -10,8 +37,16 @@ MAX_SQL_ROWS = 256 DEFAULT_CONNECTION_TIMEOUT = 120 -# Other variables AUTH_SCHEME_KEY = 'a_scheme' +AUTH_USER_KEY = 'a_user' +AUTH_PWD_KEY = 'a_pw' +AUTH_TTL_KEY = 'a_ttl' + +NATIVE_AUTH_SCHEME = 'native' + GSI_AUTH_PLUGIN = 'GSI' GSI_AUTH_SCHEME = GSI_AUTH_PLUGIN.lower() GSI_OID = "1.3.6.1.4.1.3536.1.1" # taken from http://j.mp/2hDeczm + +PAM_AUTH_PLUGIN = 'PAM' +PAM_AUTH_SCHEME = PAM_AUTH_PLUGIN.lower() diff --git a/irods/api_number.py b/irods/api_number.py index a221d4a..91bb432 100644 --- a/irods/api_number.py +++ b/irods/api_number.py @@ -176,4 +176,7 @@ # 1100 - 1200 - SSL API calls "SSL_START_AN": 1100, "SSL_END_AN": 1101, + "ATOMIC_APPLY_METADATA_OPERATIONS_APN": 20002, + "GET_FILE_DESCRIPTOR_INFO_APN": 20000, + "REPLICA_CLOSE_APN": 20004 } diff --git a/irods/collection.py b/irods/collection.py index c750f29..23982c3 100644 --- a/irods/collection.py +++ b/irods/collection.py @@ -15,6 +15,11 @@ def __init__(self, manager, result=None): self.id = result[Collection.id] self.path = result[Collection.name] self.name = irods_basename(result[Collection.name]) + self.create_time = result[Collection.create_time] + #self.modify_time = result[Collection.modify_time] + #self.inheritance = result[Collection.inheritance] + #self.owner_name = result[Collection.owner_name] + #self.owner_zone = result[Collection.owner_zone] self._meta = None @property diff --git a/irods/column.py b/irods/column.py index f4f644f..cfed46b 100644 --- a/irods/column.py +++ b/irods/column.py @@ -1,4 +1,5 @@ from __future__ import absolute_import +import six from datetime import datetime from calendar import timegm @@ -38,6 +39,20 @@ def __init__(self, op, query_key, value): def value(self): return self.query_key.column_type.to_irods(self._value) +class In(Criterion): + + def __init__(self, query_key, value): + super(In, self).__init__('in', query_key, value) + + @property + def value(self): + v = "(" + comma = "" + for element in self._value: + v += "{}'{}'".format(comma,element) + comma = "," + v += ")" + return v class Like(Criterion): @@ -113,6 +128,12 @@ def to_python(string): @staticmethod def to_irods(data): + try: + # Convert to Unicode string (aka decode) + data = six.text_type(data, 'utf-8', 'replace') + except TypeError: + # Some strings are already Unicode so they do not need decoding + pass return u"'{}'".format(data) diff --git a/irods/connection.py b/irods/connection.py index 8d791ea..53fa954 100644 --- a/irods/connection.py +++ b/irods/connection.py @@ -4,19 +4,24 @@ import struct import hashlib import six -import struct import os import ssl import hashlib +import datetime +import irods.password_obfuscation as obf +from ast import literal_eval as safe_eval from irods.message import ( - iRODSMessage, StartupPack, AuthResponse, AuthChallenge, + iRODSMessage, StartupPack, AuthResponse, AuthChallenge, AuthPluginOut, OpenedDataObjRequest, FileSeekResponse, StringStringMap, VersionResponse, - GSIAuthMessage, OpenIDAuthMessage, ClientServerNegotiation, Error) + OpenIDAuthMessage, ClientServerNegotiation, Error, PluginAuthMessage, GetTempPasswordOut) from irods.exception import get_exception_by_code, NetworkException from irods import ( MAX_PASSWORD_LENGTH, RESPONSE_LEN, - AUTH_SCHEME_KEY, GSI_AUTH_PLUGIN, GSI_AUTH_SCHEME, GSI_OID) + AUTH_SCHEME_KEY, AUTH_USER_KEY, AUTH_PWD_KEY, AUTH_TTL_KEY, + NATIVE_AUTH_SCHEME, + GSI_AUTH_PLUGIN, GSI_AUTH_SCHEME, GSI_OID, + PAM_AUTH_SCHEME) from irods.client_server_negotiation import ( perform_negotiation, validate_policy, @@ -46,8 +51,12 @@ def __init__(self, URL): def is_str(s): return isinstance(s, basestring) +class PlainTextPAMPasswordError(Exception): pass + class Connection(object): + DISALLOWING_PAM_PLAINTEXT = True + def __init__(self, pool, account, block_on_authURL=True): self.pool = pool @@ -56,29 +65,35 @@ def __init__(self, pool, account, block_on_authURL=True): self._client_signature = None self._server_version = self._connect() self.block_on_authURL=block_on_authURL + self._disconnected = False scheme = self.account.authentication_scheme - if scheme == 'native': + if scheme == NATIVE_AUTH_SCHEME: self._login_native() - elif scheme == 'gsi': + elif scheme == GSI_AUTH_SCHEME: self.client_ctx = None self._login_gsi() elif scheme == 'openid': self._login_openid() + elif scheme == PAM_AUTH_SCHEME: + self._login_pam() else: raise ValueError("Unknown authentication scheme %s" % scheme) + self.create_time = datetime.datetime.now() + self.last_used_time = self.create_time @property def server_version(self): - return tuple(int(x) for x in self._server_version.relVersion.replace('rods', '').split('.')) - + detected = tuple(int(x) for x in self._server_version.relVersion.replace('rods', '').split('.')) + return (safe_eval(os.environ.get('IRODS_SERVER_VERSION','()')) + or detected) @property def client_signature(self): return self._client_signature def __del__(self): - if self.socket: + if self.socket and getattr(self,"_disconnected",False): self.disconnect() def send(self, message): @@ -202,15 +217,18 @@ def _connect(self): try: s = socket.create_connection(address, timeout) + self._disconnected = False except socket.error: raise NetworkException( "Could not connect to specified host and port: " + "{}:{}".format(*address)) self.socket = s + main_message = StartupPack( (self.account.proxy_user, self.account.proxy_zone), - (self.account.client_user, self.account.client_zone) + (self.account.client_user, self.account.client_zone), + self.pool.application_name ) # No client-server negotiation @@ -278,6 +296,7 @@ def disconnect(self): self.socket.shutdown(socket.SHUT_RDWR) self.socket.close() self.socket = None + self._disconnected = True def recvall(self, n): # Helper function to recv n bytes or return None if EOF is hit @@ -353,9 +372,10 @@ def handshake(self, target): def gsi_client_auth_request(self): # Request for authentication with GSI on current user - message_body = GSIAuthMessage( + + message_body = PluginAuthMessage( auth_scheme_=GSI_AUTH_PLUGIN, - context_='a_user=%s' % self.account.client_user + context_='%s=%s' % (AUTH_USER_KEY, self.account.client_user) ) # GSI = 1201 # https://github.com/irods/irods/blob/master/lib/api/include/apiNumber.h#L158 @@ -520,6 +540,48 @@ def read_msg(sock): # no point trying an auth reponse if it failed logger.error('Did not complete OpenID authentication flow') + def _login_pam(self): + + ctx_user = '%s=%s' % (AUTH_USER_KEY, self.account.client_user) + ctx_pwd = '%s=%s' % (AUTH_PWD_KEY, self.account.password) + ctx_ttl = '%s=%s' % (AUTH_TTL_KEY, "60") + + ctx = ";".join([ctx_user, ctx_pwd, ctx_ttl]) + + if type(self.socket) is socket.socket: + if getattr(self,'DISALLOWING_PAM_PLAINTEXT',True): + raise PlainTextPAMPasswordError + + message_body = PluginAuthMessage( + auth_scheme_=PAM_AUTH_SCHEME, + context_=ctx + ) + + auth_req = iRODSMessage( + msg_type='RODS_API_REQ', + msg=message_body, + # int_info=725 + int_info=1201 + ) + + self.send(auth_req) + # Getting the new password + output_message = self.recv() + + auth_out = output_message.get_main_message(AuthPluginOut) + + self.disconnect() + self._connect() + + if hasattr(self.account,'store_pw'): + drop = self.account.store_pw + if type(drop) is list: + drop[:] = [ auth_out.result_ ] + + self._login_native(password=auth_out.result_) + + logger.info("PAM authorization validated") + def read_file(self, desc, size=-1, buffer=None): if size < 0: size = len(buffer) @@ -547,7 +609,11 @@ def read_file(self, desc, size=-1, buffer=None): return response.bs - def _login_native(self): + def _login_native(self, password=None): + + # Default case, PAM login will send a new password + if password is None: + password = self.account.password # authenticate auth_req = iRODSMessage(msg_type='RODS_API_REQ', int_info=703) @@ -569,11 +635,11 @@ def _login_native(self): if six.PY3: challenge = challenge.strip() padded_pwd = struct.pack( - "%ds" % MAX_PASSWORD_LENGTH, self.account.password.encode( + "%ds" % MAX_PASSWORD_LENGTH, password.encode( 'utf-8').strip()) else: padded_pwd = struct.pack( - "%ds" % MAX_PASSWORD_LENGTH, self.account.password) + "%ds" % MAX_PASSWORD_LENGTH, password) m = hashlib.md5() m.update(challenge) @@ -643,3 +709,16 @@ def close_file(self, desc, **options): self.send(message) self.recv() + + def temp_password(self): + request = iRODSMessage("RODS_API_REQ", msg=None, + int_info=api_number['GET_TEMP_PASSWORD_AN']) + + # Send and receive request + self.send(request) + response = self.recv() + logger.debug(response.int_info) + + # Convert and return answer + msg = response.get_main_message(GetTempPasswordOut) + return obf.create_temp_password(msg.stringToHashWith, self.account.password) diff --git a/irods/data_object.py b/irods/data_object.py index c0a336c..0760f27 100644 --- a/irods/data_object.py +++ b/irods/data_object.py @@ -3,13 +3,18 @@ import sys import logging import six +import os +import ast from irods.models import DataObject from irods.meta import iRODSMetaCollection import irods.keywords as kw +from irods.api_number import api_number +from irods.message import (JSON_Message, iRODSMessage) logger = logging.getLogger(__name__) +IRODS_SERVER_WITH_CLOSE_REPLICA_API = (4,2,9) def chunks(f, chunksize=io.DEFAULT_BUFFER_SIZE): return iter(lambda: f.read(chunksize), b'') @@ -23,11 +28,12 @@ def irods_basename(path): class iRODSReplica(object): - def __init__(self, number, status, resource_name, path, **kwargs): + def __init__(self, number, status, resource_name, path, resc_hier, **kwargs): self.number = number self.status = status self.resource_name = resource_name self.path = path + self.resc_hier = resc_hier for key, value in kwargs.items(): setattr(self, key, value) @@ -61,10 +67,15 @@ def __init__(self, manager, parent=None, results=None): r[DataObject.replica_status], r[DataObject.resource_name], r[DataObject.path], - checksum=r[DataObject.checksum] + r[DataObject.resc_hier], + checksum=r[DataObject.checksum], + size=r[DataObject.size] ) for r in replicas] self._meta = None + + + def __repr__(self): return "".format(**vars(self)) @@ -75,11 +86,11 @@ def metadata(self): self.manager.sess.metadata, DataObject, self.path) return self._meta - def open(self, mode='r', **options): - if kw.DEST_RESC_NAME_KW not in options: - options[kw.DEST_RESC_NAME_KW] = self.replicas[0].resource_name + def open(self, mode='r', finalize_on_close = True, **options): + return self.manager.open(self.path, mode, finalize_on_close = finalize_on_close, **options) - return self.manager.open(self.path, mode, **options) + def chksum(self, **options): + return self.manager.chksum(self.path, **options) def unlink(self, force=False, **options): self.manager.unlink(self.path, force, **options) @@ -98,13 +109,61 @@ def replicate(self, resource=None, **options): class iRODSDataObjectFileRaw(io.RawIOBase): - def __init__(self, conn, descriptor, **options): + """The raw object supporting file-like operations (read/write/seek) for the + iRODSDataObject.""" + + def __init__(self, conn, descriptor, finalize_on_close = True, **options): + """ + Constructor needs a connection and an iRODS data object descriptor. If the + finalize_on_close flag evaluates False, close() will invoke the REPLICA_CLOSE + API instead of closing and finalizing the object (useful for parallel + transfers using multiple threads). + """ + super(iRODSDataObjectFileRaw,self).__init__() self.conn = conn self.desc = descriptor self.options = options + self.finalize_on_close = finalize_on_close + + def replica_access_info(self): + message_body = JSON_Message( {'fd': self.desc}, + server_version = self.conn.server_version ) + message = iRODSMessage('RODS_API_REQ', msg = message_body, + int_info=api_number['GET_FILE_DESCRIPTOR_INFO_APN']) + self.conn.send(message) + result = None + try: + result = self.conn.recv() + except Exception as e: + logger.warning('''Couldn't receive or process response to GET_FILE_DESCRIPTOR_INFO_APN -- ''' + '''caught: %r''',e) + raise + dobj_info = result.get_json_encoded_struct() + replica_token = dobj_info.get("replica_token","") + resc_hier = ( dobj_info.get("data_object_info") or {} ).get("resource_hierarchy","") + return (replica_token, resc_hier) + + def _close_replica(self): + server_version = ast.literal_eval(os.environ.get('IRODS_VERSION_OVERRIDE', '()' )) + if (server_version or self.conn.server_version) < IRODS_SERVER_WITH_CLOSE_REPLICA_API: return False + message_body = JSON_Message( { "fd": self.desc, + "send_notification": False, + "update_size": False, + "update_status": False, + "compute_checksum": False }, + server_version = self.conn.server_version ) + self.conn.send( iRODSMessage('RODS_API_REQ', msg = message_body, + int_info=api_number['REPLICA_CLOSE_APN']) ) + try: + self.conn.recv().int_info + except Exception: + logger.warning ('** ERROR on closing replica **') + raise + return True def close(self): - self.conn.close_file(self.desc, **self.options) + if self.finalize_on_close or not self._close_replica(): + self.conn.close_file(self.desc, **self.options) self.conn.release() super(iRODSDataObjectFileRaw, self).close() return None diff --git a/irods/exception.py b/irods/exception.py index a1976a1..9617929 100644 --- a/irods/exception.py +++ b/irods/exception.py @@ -4,6 +4,8 @@ from __future__ import absolute_import import six + + class PycommandsException(Exception): pass @@ -24,6 +26,10 @@ class CollectionDoesNotExist(DoesNotExist): pass +class ZoneDoesNotExist(DoesNotExist): + pass + + class UserDoesNotExist(DoesNotExist): pass @@ -504,6 +510,10 @@ class SYS_RESC_QUOTA_EXCEEDED(SystemException): code = -110000 +class SYS_INVALID_INPUT_PARAM(SystemException): + code = -130000 + + class UserInputException(iRODSException): pass @@ -1131,6 +1141,9 @@ class CAT_TABLE_ACCESS_DENIED(CatalogLibraryException): class CAT_UNKNOWN_SPECIFIC_QUERY(CatalogLibraryException): code = -853000 +class CAT_STATEMENT_TABLE_FULL(CatalogLibraryException): + code = -860000 + class RDSException(iRODSException): pass @@ -1874,3 +1887,23 @@ class PHP_REQUEST_STARTUP_ERR(PHPException): class PHP_OPEN_SCRIPT_FILE_ERR(PHPException): code = -1602000 + + +class PAMException(iRODSException): + pass + + +class PAM_AUTH_NOT_BUILT_INTO_CLIENT(PAMException): + code = -991000 + + +class PAM_AUTH_NOT_BUILT_INTO_SERVER(PAMException): + code = -992000 + + +class PAM_AUTH_PASSWORD_FAILED(PAMException): + code = -993000 + + +class PAM_AUTH_PASSWORD_INVALID_TTL(PAMException): + code = -994000 diff --git a/irods/keywords.py b/irods/keywords.py index d70ac24..c49e907 100644 --- a/irods/keywords.py +++ b/irods/keywords.py @@ -13,7 +13,7 @@ RESC_NAME_KW = "rescName" # resource name # DEST_RESC_NAME_KW = "destRescName" # destination resource name # DEF_RESC_NAME_KW = "defRescName" # default resource name # -BACKUP_RESC_NAME_KW = "backupRescName" # destination resource name # +BACKUP_RESC_NAME_KW = "backupRescName" # backup resource name # DATA_TYPE_KW = "dataType" # data type # DATA_SIZE_KW = "dataSize" CHKSUM_KW = "chksum" @@ -210,6 +210,7 @@ # =-=-=-=-=-=-=- # irods general keywords definitions RESC_HIER_STR_KW = "resc_hier" +REPLICA_TOKEN_KW = "replicaToken" DEST_RESC_HIER_STR_KW = "dest_resc_hier" IN_PDMO_KW = "in_pdmo" STAGE_OBJ_KW = "stage_object" diff --git a/irods/manager/__init__.py b/irods/manager/__init__.py index 9ad1dcf..09c184c 100644 --- a/irods/manager/__init__.py +++ b/irods/manager/__init__.py @@ -1,4 +1,15 @@ class Manager(object): + __server_version = () + + @property + def server_version(self): + if not self.__server_version: + p = self.sess.pool + if p is None : raise RuntimeError ("session not configured") + conn = getattr(p,"_conn",None) or p.get_connection() + if conn: self.__server_version = conn.server_version + return tuple( self.__server_version ) + def __init__(self, sess): self.sess = sess diff --git a/irods/manager/access_manager.py b/irods/manager/access_manager.py index 8276e5d..20ec57e 100644 --- a/irods/manager/access_manager.py +++ b/irods/manager/access_manager.py @@ -4,20 +4,38 @@ from irods.manager import Manager from irods.api_number import api_number from irods.message import ModAclRequest, iRODSMessage -from irods.data_object import iRODSDataObject +from irods.data_object import ( iRODSDataObject, irods_dirname, irods_basename ) from irods.collection import iRODSCollection -from irods.models import ( - DataObject, Collection, User, DataAccess, CollectionAccess, CollectionUser) +from irods.models import ( DataObject, Collection, User, CollectionUser, + DataAccess, CollectionAccess ) from irods.access import iRODSAccess +from irods.column import In +from irods.user import iRODSUser +import six import logging logger = logging.getLogger(__name__) +def users_by_ids(session,ids=()): + try: + ids=list(iter(ids)) + except TypeError: + if type(ids) in (str,) + six.integer_types: ids=int(ids) + else: raise + cond = () if not ids \ + else (In(User.id,list(map(int,ids))),) if len(ids)>1 \ + else (User.id == int(ids[0]),) + return [ iRODSUser(session.users,i) + for i in session.query(User.id,User.name,User.type,User.zone).filter(*cond) ] class AccessManager(Manager): - def get(self, target): + def get(self, target, report_raw_acls = False, **kw): + + if report_raw_acls: + return self.__get_raw(target, **kw) # prefer a behavior consistent with 'ils -A` + # different query whether target is an object or a collection if type(target) == iRODSDataObject: access_type = DataAccess @@ -45,6 +63,56 @@ def get(self, target): user_zone=row[user_type.zone] ) for row in results] + def coll_access_query(self,path): + return self.sess.query(Collection, CollectionAccess).filter(Collection.name == path) + + def data_access_query(self,path): + cn = irods_dirname(path) + dn = irods_basename(path) + return self.sess.query(DataObject, DataAccess).filter( Collection.name == cn, DataObject.name == dn ) + + def __get_raw(self, target, **kw): + + ### sample usage: ### + # + # user_id_list = [] # simply to store the user id's from the discovered ACL's + # session.permissions.get( data_or_coll_target, report_raw_acls = True, + # acl_users = user_id_list, + # acl_users_transform = lambda u: u.id) + # + # -> returns list of iRODSAccess objects mapping one-to-one with ACL's stored in the catalog + + users_out = kw.pop( 'acl_users', None ) + T = kw.pop( 'acl_users_transform', lambda value : value ) + + # different choice of query based on whether target is an object or a collection + if isinstance(target, iRODSDataObject): + access_column = DataAccess + query_func = self.data_access_query + + elif isinstance(target, iRODSCollection): + access_column = CollectionAccess + query_func = self.coll_access_query + else: + raise TypeError + + rows = [ r for r in query_func(target.path) ] + userids = set( r[access_column.user_id] for r in rows ) + + user_lookup = { j.id:j for j in users_by_ids(self.sess, userids) } + + if isinstance(users_out, dict): users_out.update (user_lookup) + elif isinstance (users_out, list): users_out += [T(v) for v in user_lookup.values()] + elif isinstance (users_out, set): users_out |= set(T(v) for v in user_lookup.values()) + elif users_out is None: pass + else: raise TypeError + + acls = [ iRODSAccess ( r[access_column.name], + target.path, + user_lookup[r[access_column.user_id]].name, + user_lookup[r[access_column.user_id]].zone ) for r in rows ] + return acls + def set(self, acl, recursive=False, admin=False): prefix = 'admin:' if admin else '' diff --git a/irods/manager/data_object_manager.py b/irods/manager/data_object_manager.py index 9ceaee8..709bd69 100644 --- a/irods/manager/data_object_manager.py +++ b/irods/manager/data_object_manager.py @@ -4,14 +4,25 @@ from irods.models import DataObject from irods.manager import Manager from irods.message import ( - iRODSMessage, FileOpenRequest, ObjCopyRequest, StringStringMap, DataObjInfo, ModDataObjMeta) + iRODSMessage, FileOpenRequest, ObjCopyRequest, StringStringMap, DataObjInfo, ModDataObjMeta, + DataObjChksumRequest, DataObjChksumResponse) import irods.exception as ex from irods.api_number import api_number from irods.data_object import ( iRODSDataObject, iRODSDataObjectFileRaw, chunks, irods_dirname, irods_basename) import irods.keywords as kw +import irods.parallel as parallel +import six +import ast +MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE = 32 * ( 1024 ** 2) + +DEFAULT_NUMBER_OF_THREADS = 0 # Defaults for reasonable number of threads -- optimized to be + # performant but allow no more worker threads than available CPUs. + # Setting this to 1 disables automatic use of parallel transfer. +DEFAULT_QUEUE_DEPTH = 32 + class DataObjectManager(Manager): READ_BUFFER_SIZE = 1024 * io.DEFAULT_BUFFER_SIZE @@ -26,58 +37,138 @@ class DataObjectManager(Manager): O_EXCL = 128 O_TRUNC = 512 - def _download(self, obj, local_path, **options): + + def should_parallelize_transfer( self, + num_threads = 0, + obj_sz = 1+MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE, + server_version_hint = ()): + # Allow an environment variable to override the detection of the server version. + # Example: $ export IRODS_VERSION_OVERRIDE="4,2,9" ; python -m irods.parallel ... + server_version = ( ast.literal_eval(os.environ.get('IRODS_VERSION_OVERRIDE', '()' )) or server_version_hint or + self.server_version ) + if num_threads == 1 or ( server_version < parallel.MINIMUM_SERVER_VERSION ): + return False + if getattr(obj_sz,'seek',None) : + pos = obj_sz.tell() + size = obj_sz.seek(0,os.SEEK_END) + if not isinstance(size,six.integer_types): + size = obj_sz.tell() + obj_sz.seek(pos,os.SEEK_SET) + else: + size = obj_sz + assert (size > -1) + return size > MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE + + + def _download(self, obj, local_path, num_threads, **options): + if os.path.isdir(local_path): - file = os.path.join(local_path, irods_basename(obj)) + local_file = os.path.join(local_path, irods_basename(obj)) else: - file = local_path + local_file = local_path - # Check for force flag if file exists - if os.path.exists(file) and kw.FORCE_FLAG_KW not in options: + # Check for force flag if local_file exists + if os.path.exists(local_file) and kw.FORCE_FLAG_KW not in options: raise ex.OVERWRITE_WITHOUT_FORCE_FLAG - with open(file, 'wb') as f, self.open(obj, 'r', **options) as o: - for chunk in chunks(o, self.READ_BUFFER_SIZE): - f.write(chunk) + with open(local_file, 'wb') as f, self.open(obj, 'r', **options) as o: + + if self.should_parallelize_transfer (num_threads, o): + f.close() + if not self.parallel_get( (obj,o), local_path, num_threads = num_threads, + target_resource_name = options.get(kw.RESC_NAME_KW,'')): + raise RuntimeError("parallel get failed") + else: + for chunk in chunks(o, self.READ_BUFFER_SIZE): + f.write(chunk) - def get(self, path, file=None, **options): + def get(self, path, local_path = None, num_threads = DEFAULT_NUMBER_OF_THREADS, **options): parent = self.sess.collections.get(irods_dirname(path)) # TODO: optimize - if file: - self._download(path, file, **options) + if local_path: + self._download(path, local_path, num_threads = num_threads, **options) query = self.sess.query(DataObject)\ .filter(DataObject.name == irods_basename(path))\ .filter(DataObject.collection_id == parent.id)\ .add_keyword(kw.ZONE_KW, path.split('/')[1]) + results = query.all() # get up to max_rows replicas if len(results) <= 0: raise ex.DataObjectDoesNotExist() return iRODSDataObject(self, parent, results) - def put(self, file, irods_path, **options): + def put(self, local_path, irods_path, return_data_object = False, num_threads = DEFAULT_NUMBER_OF_THREADS, **options): + if irods_path.endswith('/'): - obj = irods_path + os.path.basename(file) + obj = irods_path + os.path.basename(local_path) else: obj = irods_path - # Set operation type to trigger acPostProcForPut - if kw.OPR_TYPE_KW not in options: - options[kw.OPR_TYPE_KW] = 1 # PUT_OPR - - with open(file, 'rb') as f, self.open(obj, 'w', **options) as o: - for chunk in chunks(f, self.WRITE_BUFFER_SIZE): - o.write(chunk) + with open(local_path, 'rb') as f, self.open(obj, 'w', **options) as o: + + if self.should_parallelize_transfer (num_threads, f): + f.close() + if not self.parallel_put( local_path, (obj,o), num_threads = num_threads, + target_resource_name = options.get(kw.RESC_NAME_KW,'') or + options.get(kw.DEST_RESC_NAME_KW,'')): + raise RuntimeError("parallel put failed") + else: + # Set operation type to trigger acPostProcForPut + if kw.OPR_TYPE_KW not in options: + options[kw.OPR_TYPE_KW] = 1 # PUT_OPR + for chunk in chunks(f, self.WRITE_BUFFER_SIZE): + o.write(chunk) if kw.ALL_KW in options: options[kw.UPDATE_REPL_KW] = '' self.replicate(obj, **options) + if return_data_object: + return self.get(obj) + + def chksum(self, path, **options): + message_body = DataObjChksumRequest(path, **options) + message = iRODSMessage('RODS_API_REQ', msg=message_body, + int_info=api_number['DATA_OBJ_CHKSUM_AN']) + checksum = None + with self.sess.pool.get_connection() as conn: + conn.send(message) + response = conn.recv() + results = response.get_main_message(DataObjChksumResponse) + checksum = results.myStr + return checksum + + + def parallel_get(self, + data_or_path_ , + file_ , + async_ = False, + num_threads = 0, + target_resource_name = '', + progressQueue = False): + + return parallel.io_main( self.sess, data_or_path_, parallel.Oper.GET | (parallel.Oper.NONBLOCKING if async_ else 0), file_, + num_threads = num_threads, target_resource_name = target_resource_name, + queueLength = (DEFAULT_QUEUE_DEPTH if progressQueue else 0)) + + def parallel_put(self, + file_ , + data_or_path_ , + async_ = False, + num_threads = 0, + target_resource_name = '', + progressQueue = False): - def create(self, path, resource=None, **options): + return parallel.io_main( self.sess, data_or_path_, parallel.Oper.PUT | (parallel.Oper.NONBLOCKING if async_ else 0), file_, + num_threads = num_threads, target_resource_name = target_resource_name, + queueLength = (DEFAULT_QUEUE_DEPTH if progressQueue else 0)) + + + def create(self, path, resource=None, force=False, **options): options[kw.DATA_TYPE_KW] = 'generic' if resource: @@ -89,6 +180,9 @@ def create(self, path, resource=None, **options): except AttributeError: pass + if force: + options[kw.FORCE_FLAG_KW] = '' + message_body = FileOpenRequest( objPath=path, createMode=0o644, @@ -111,21 +205,27 @@ def create(self, path, resource=None, **options): return self.get(path) - def open(self, path, mode, **options): + def open_with_FileRaw(self, *arg, **kw_options): + holder = [] + handle = self.open(*arg,_raw_fd_holder=holder,**kw_options) + return (handle, holder[-1]) + + def open(self, path, mode, create = True, finalize_on_close = True, **options): + _raw_fd_holder = options.get('_raw_fd_holder',[]) if kw.DEST_RESC_NAME_KW not in options: # Use client-side default resource if available try: options[kw.DEST_RESC_NAME_KW] = self.sess.default_resource except AttributeError: pass - + createFlag = self.O_CREAT if create else 0 flags, seek_to_end = { 'r': (self.O_RDONLY, False), 'r+': (self.O_RDWR, False), - 'w': (self.O_WRONLY | self.O_CREAT | self.O_TRUNC, False), - 'w+': (self.O_RDWR | self.O_CREAT | self.O_TRUNC, False), - 'a': (self.O_WRONLY | self.O_CREAT, True), - 'a+': (self.O_RDWR | self.O_CREAT, True), + 'w': (self.O_WRONLY | createFlag | self.O_TRUNC, False), + 'w+': (self.O_RDWR | createFlag | self.O_TRUNC, False), + 'a': (self.O_WRONLY | createFlag, True), + 'a+': (self.O_RDWR | createFlag, True), }[mode] # TODO: Use seek_to_end @@ -151,7 +251,9 @@ def open(self, path, mode, **options): conn.send(message) desc = conn.recv().int_info - return io.BufferedRandom(iRODSDataObjectFileRaw(conn, desc, **options)) + raw = iRODSDataObjectFileRaw(conn, desc, finalize_on_close = finalize_on_close, **options) + (_raw_fd_holder).append(raw) + return io.BufferedRandom(raw) def unlink(self, path, force=False, **options): diff --git a/irods/manager/metadata_manager.py b/irods/manager/metadata_manager.py index 338e5d6..6f4fe6e 100644 --- a/irods/manager/metadata_manager.py +++ b/irods/manager/metadata_manager.py @@ -1,17 +1,22 @@ +from __future__ import print_function from __future__ import absolute_import import logging from os.path import dirname, basename from irods.manager import Manager -from irods.message import MetadataRequest, iRODSMessage +from irods.message import MetadataRequest, iRODSMessage, JSON_Message from irods.api_number import api_number from irods.models import (DataObject, Collection, Resource, User, DataObjectMeta, CollectionMeta, ResourceMeta, UserMeta) -from irods.meta import iRODSMeta +from irods.meta import iRODSMeta, AVUOperation + logger = logging.getLogger(__name__) +class InvalidAtomicAVURequest(Exception): pass + + class MetadataManager(Manager): @staticmethod @@ -23,6 +28,15 @@ def _model_class_to_resource_type(model_cls): User: 'u', }[model_cls] + @staticmethod + def _model_class_to_resource_description(model_cls): + return { + DataObject: 'data_object', + Collection: 'collection', + Resource: 'resource', + User: 'user', + }[model_cls] + def get(self, model_cls, path): resource_type = self._model_class_to_resource_type(model_cls) model = { @@ -121,3 +135,33 @@ def set(self, model_cls, path, meta): conn.send(request) response = conn.recv() logger.debug(response.int_info) + + @staticmethod + def _avu_operation_to_dict( op ): + opJSON = { "operation": op.operation, + "attribute": op.avu.name, + "value": op.avu.value + } + if op.avu.units not in ("",None): + opJSON["units"] = op.avu.units + return opJSON + + def apply_atomic_operations(self, model_cls, path, *avu_ops): + if not all(isinstance(op,AVUOperation) for op in avu_ops): + raise InvalidAtomicAVURequest("avu_ops must contain 1 or more AVUOperations") + request = { + "entity_name": path, + "entity_type": self._model_class_to_resource_description(model_cls), + "operations" : [self._avu_operation_to_dict(op) for op in avu_ops] + } + self._call_atomic_metadata_api(request) + + def _call_atomic_metadata_api(self, request_text): + with self.sess.pool.get_connection() as conn: + request_msg = iRODSMessage("RODS_API_REQ", JSON_Message( request_text, conn.server_version ), + int_info=api_number['ATOMIC_APPLY_METADATA_OPERATIONS_APN']) + conn.send( request_msg ) + response = conn.recv() + response_msg = response.get_json_encoded_struct() + logger.debug("in atomic_metadata, server responded with: %r",response_msg) + diff --git a/irods/manager/user_manager.py b/irods/manager/user_manager.py index 3bdc1b4..01e7b41 100644 --- a/irods/manager/user_manager.py +++ b/irods/manager/user_manager.py @@ -1,15 +1,10 @@ from __future__ import absolute_import import logging -import six -if six.PY3: - from html import escape -else: - from cgi import escape from irods.models import User, UserGroup from irods.manager import Manager -from irods.message import GeneralAdminRequest, iRODSMessage -from irods.exception import UserDoesNotExist, UserGroupDoesNotExist, NoResultFound +from irods.message import GeneralAdminRequest, iRODSMessage, GetTempPasswordForOtherRequest, GetTempPasswordForOtherOut +from irods.exception import UserDoesNotExist, UserGroupDoesNotExist, NoResultFound, CAT_SQL_ERR from irods.api_number import api_number from irods.user import iRODSUser, iRODSUserGroup import irods.password_obfuscation as obf @@ -35,7 +30,8 @@ def create(self, user_name, user_type, user_zone="", auth_str=""): message_body = GeneralAdminRequest( "add", "user", - user_name, + user_name if not user_zone or user_zone == self.sess.zone \ + else "{}#{}".format(user_name,user_zone), user_type, user_zone, auth_str @@ -62,6 +58,30 @@ def remove(self, user_name, user_zone=""): response = conn.recv() logger.debug(response.int_info) + def temp_password_for_user(self, user_name): + with self.sess.pool.get_connection() as conn: + message_body = GetTempPasswordForOtherRequest( + targetUser=user_name, + unused=None + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GET_TEMP_PASSWORD_FOR_OTHER_AN']) + + # Send request + conn.send(request) + + # Receive answer + try: + response = conn.recv() + logger.debug(response.int_info) + except CAT_SQL_ERR: + raise UserDoesNotExist() + + # Convert and return answer + msg = response.get_main_message(GetTempPasswordForOtherOut) + return obf.create_temp_password(msg.stringToHashWith, conn.account.password) + + def modify(self, user_name, option, new_value, user_zone=""): # must append zone to username for this API call @@ -75,11 +95,6 @@ def modify(self, user_name, option, new_value, user_zone=""): current_password = self.sess.pool.account.password new_value = obf.obfuscate_new_password(new_value, current_password, conn.client_signature) - # html style escaping might have to be generalized: - # https://github.com/irods/irods/blob/4.2.1/lib/core/src/packStruct.cpp#L1913 - # https://github.com/irods/irods/blob/4.2.1/lib/core/src/packStruct.cpp#L1331-L1368 - new_value = escape(new_value, quote=False) - message_body = GeneralAdminRequest( "modify", "user", diff --git a/irods/manager/zone_manager.py b/irods/manager/zone_manager.py new file mode 100644 index 0000000..f6416c2 --- /dev/null +++ b/irods/manager/zone_manager.py @@ -0,0 +1,50 @@ +from __future__ import absolute_import +import logging + +from irods.models import Zone +from irods.zone import iRODSZone +from irods.manager import Manager +from irods.message import GeneralAdminRequest, iRODSMessage +from irods.api_number import api_number +from irods.exception import ZoneDoesNotExist, NoResultFound + +logger = logging.getLogger(__name__) + +class ZoneManager(Manager): + + def get(self, zone_name): + query = self.sess.query(Zone).filter(Zone.name == zone_name) + + try: + result = query.one() + except NoResultFound: + raise ZoneDoesNotExist() + return iRODSZone(self, result) + + def create(self, zone_name, zone_type): + message_body = GeneralAdminRequest( + "add", + "zone", + zone_name, + zone_type, + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GENERAL_ADMIN_AN']) + with self.sess.pool.get_connection() as conn: + conn.send(request) + response = conn.recv() + logger.debug(response.int_info) + return self.get(zone_name) + + def remove(self, zone_name): + message_body = GeneralAdminRequest( + "rm", + "zone", + zone_name + ) + request = iRODSMessage("RODS_API_REQ", msg=message_body, + int_info=api_number['GENERAL_ADMIN_AN']) + with self.sess.pool.get_connection() as conn: + conn.send(request) + response = conn.recv() + logger.debug(response.int_info) diff --git a/irods/message/__init__.py b/irods/message/__init__.py index 2bc7d31..96f749a 100644 --- a/irods/message/__init__.py +++ b/irods/message/__init__.py @@ -1,6 +1,9 @@ +"""Define objects related to communication with iRODS server API endpoints.""" + import struct import logging import socket +import json import xml.etree.ElementTree as ET from irods.message.message import Message from irods.message.property import (BinaryProperty, StringProperty, @@ -58,6 +61,15 @@ def _recv_message_into(sock, buffer, size): index += rsize return mv[:index] +#------------------------------------ + +class BinBytesBuf(Message): + _name = 'BinBytesBuf_PI' + buflen = IntegerProperty() + buf = BinaryProperty() + +class JSON_Binary_Response(BinBytesBuf): + pass class iRODSMessage(object): @@ -68,6 +80,15 @@ def __init__(self, msg_type=b'', msg=None, error=b'', bs=b'', int_info=0): self.bs = bs self.int_info = int_info + def get_json_encoded_struct (self): + Xml = ET.fromstring(self.msg.replace(b'\0',b'')) + json_str = Xml.find('buf').text + if Xml.tag == 'BinBytesBuf_PI': + mybin = JSON_Binary_Response() + mybin.unpack(Xml) + json_str = mybin.buf.replace(b'\0',b'').decode() + return json.loads( json_str ) + @staticmethod def recv(sock): # rsp_header_size = sock.recv(4, socket.MSG_WAITALL) @@ -188,7 +209,7 @@ class ClientServerNegotiation(Message): class StartupPack(Message): _name = 'StartupPack_PI' - def __init__(self, proxy_user, client_user): + def __init__(self, proxy_user, client_user, application_name = ''): super(StartupPack, self).__init__() if proxy_user and client_user: self.irodsProt = 1 @@ -197,7 +218,7 @@ def __init__(self, proxy_user, client_user): self.clientUser, self.clientRcatZone = client_user self.relVersion = "rods{}.{}.{}".format(*IRODS_VERSION) self.apiVersion = "{3}".format(*IRODS_VERSION) - self.option = "" + self.option = application_name irodsProt = IntegerProperty() reconnFlag = IntegerProperty() @@ -223,16 +244,52 @@ class AuthChallenge(Message): _name = 'authRequestOut_PI' challenge = BinaryProperty(64) + +class AuthPluginOut(Message): + _name = 'authPlugReqOut_PI' + result_ = StringProperty() + # result_ = BinaryProperty(16) + # define InxIvalPair_PI "int iiLen; int *inx(iiLen); int *ivalue(iiLen);" +class JSON_Binary_Request(BinBytesBuf): -class BinBytesBuf(Message): - _name = 'BinBytesBuf_PI' + """A message body whose payload is BinBytesBuf containing JSON.""" + + def __init__(self,msg_struct): + """Initialize with a Python data structure that will be converted to JSON.""" + super(JSON_Binary_Request,self).__init__() + string = json.dumps(msg_struct) + self.buf = string + self.buflen = len(string) + +class BytesBuf(Message): + + """A generic structure carrying text content""" + + _name = 'BytesBuf_PI' buflen = IntegerProperty() - buf = BinaryProperty() + buf = StringProperty() + def __init__(self,string,*v,**kw): + super(BytesBuf,self).__init__(*v,**kw) + self.buf = string + self.buflen = len(self.buf) +class JSON_XMLFramed_Request(BytesBuf): -class GSIAuthMessage(Message): + """A message body whose payload is a BytesBuf containing JSON.""" + def __init__(self, msg_struct): + """Initialize with a Python data structure that will be converted to JSON.""" + s = json.dumps(msg_struct) + super(JSON_XMLFramed_Request,self).__init__(s) + +def JSON_Message( msg_struct , server_version = () ): + cls = JSON_XMLFramed_Request if server_version < (4,2,9) \ + else JSON_Binary_Request + return cls(msg_struct) + + +class PluginAuthMessage(Message): _name = 'authPlugReqInp_PI' auth_scheme_ = StringProperty() context_ = StringProperty() @@ -259,6 +316,21 @@ def __init__(self, list_of_keyval_tuples ): self._keys.append(k) self._values.append(v) +class _OrderedMultiMapping : + def keys(self): + return self._keys + def values(self): + return self._values + def __len__(self): + return len(self._keys) + def __init__(self, list_of_keyval_tuples ): + self._keys = [] + self._values = [] + for k,v in list_of_keyval_tuples: + self._keys.append(k) + self._values.append(v) + + class IntegerIntegerMap(Message): _name = 'InxIvalPair_PI' @@ -362,6 +434,22 @@ class FileOpenRequest(Message): oprType = IntegerProperty() KeyValPair_PI = SubmessageProperty(StringStringMap) +class DataObjChksumRequest(FileOpenRequest): + """Report and/or generate a data object's checksum.""" + + def __init__(self,path,**chksumOptions): + """Construct the request using the path of a data object.""" + super(DataObjChksumRequest,self).__init__() + for attr,prop in vars(FileOpenRequest).items(): + if isinstance(prop, (IntegerProperty,LongProperty)): + setattr(self, attr, 0) + self.objPath = path + self.KeyValPair_PI = StringStringMap(chksumOptions) + +class DataObjChksumResponse(Message): + name = 'Str_PI' + myStr = StringProperty() + # define OpenedDataObjInp_PI "int l1descInx; int len; int whence; int # oprType; double offset; double bytesWritten; struct KeyValPair_PI;" @@ -478,6 +566,22 @@ def __init__(self, *args): arg9 = StringProperty() +class GetTempPasswordForOtherRequest(Message): + _name = 'getTempPasswordForOtherInp_PI' + targetUser = StringProperty() + unused = StringProperty() + + +class GetTempPasswordForOtherOut(Message): + _name = 'getTempPasswordForOtherOut_PI' + stringToHashWith = StringProperty() + + +class GetTempPasswordOut(Message): + _name = 'getTempPasswordOut_PI' + stringToHashWith = StringProperty() + + #define ticketAdminInp_PI "str *arg1; str *arg2; str *arg3; str *arg4; str *arg5; str *arg6;" class TicketAdminRequest(Message): diff --git a/irods/message/property.py b/irods/message/property.py index 443d371..b4c250e 100644 --- a/irods/message/property.py +++ b/irods/message/property.py @@ -3,7 +3,10 @@ from irods.message.ordered import OrderedProperty import six - +if six.PY3: + from html import escape +else: + from cgi import escape class MessageProperty(OrderedProperty): @@ -82,24 +85,27 @@ def __init__(self, length=None): self.length = length super(StringProperty, self).__init__() + @staticmethod + def escape_xml_string(string): + return escape(string, quote=False) if six.PY2: def format(self, value): if isinstance(value, str) or isinstance(value, unicode): - return value + return self.escape_xml_string(value) - return str(value) + return self.escape_xml_string(str(value)) else: # Python 3 def format(self, value): if isinstance(value, str): - return value + return self.escape_xml_string(value) if isinstance(value, bytes): - return value.decode() + return self.escape_xml_string(value.decode()) - return str(value) + return self.escape_xml_string(str(value)) def parse(self, value): diff --git a/irods/meta.py b/irods/meta.py index 4137ac0..ad16eb1 100644 --- a/irods/meta.py +++ b/irods/meta.py @@ -1,3 +1,5 @@ + + class iRODSMeta(object): def __init__(self, name, value, units=None, avu_id=None): @@ -10,6 +12,56 @@ def __repr__(self): return "".format(**vars(self)) +class BadAVUOperationKeyword(Exception): pass + +class BadAVUOperationValue(Exception): pass + + +class AVUOperation(dict): + + @property + def operation(self): + return self['operation'] + + @operation.setter + def operation(self,Oper): + self._check_operation(Oper) + self['operation'] = Oper + + @property + def avu(self): + return self['avu'] + + @avu.setter + def avu(self,newAVU): + self._check_avu(newAVU) + self['avu'] = newAVU + + def _check_avu(self,avu_param): + if not isinstance(avu_param, iRODSMeta): + error_msg = "Nonconforming avu {!r} of type {}; must be an iRODSMeta." \ + "".format(avu_param,type(avu_param).__name__) + raise BadAVUOperationValue(error_msg) + + def _check_operation(self,operation): + if operation not in ('add','remove'): + error_msg = "Nonconforming operation {!r}; must be 'add' or 'remove'.".format(operation) + raise BadAVUOperationValue(error_msg) + + def __init__(self, operation, avu, **kw): + """Constructor: + AVUOperation( operation = opstr, # where opstr is "add" or "remove" + avu = metadata ) # where metadata is an irods.meta.iRODSMeta instance + """ + super(AVUOperation,self).__init__() + self._check_operation (operation) + self._check_avu (avu) + if kw: + raise BadAVUOperationKeyword('''Nonconforming keyword (s) {}.'''.format(list(kw.keys()))) + for atr in ('operation','avu'): + setattr(self,atr,locals()[atr]) + + class iRODSMetaCollection(object): def __init__(self, manager, model_cls, path): @@ -47,6 +99,10 @@ def _get_meta(self, *args): "Must specify an iRODSMeta object or key, value, units)") return args[0] if len(args) == 1 else iRODSMeta(*args) + def apply_atomic_operations(self, *avu_ops): + self._manager.apply_atomic_operations(self._model_cls, self._path, *avu_ops) + self._reset_metadata() + def add(self, *args): """ Add as iRODSMeta to a key diff --git a/irods/models.py b/irods/models.py index 08c53e7..71d642a 100644 --- a/irods/models.py +++ b/irods/models.py @@ -19,9 +19,30 @@ class Model(six.with_metaclass(ModelBase, object)): pass +class RuleExec(Model): + id = Column(Integer, 'RULE_EXEC_ID', 1000) + name = Column(String, 'RULE_EXEC_NAME', 1001) + rei_file_path = Column(String,'RULE_EXEC_REI_FILE_PATH', 1002) + user_name = Column(String, 'RULE_EXEC_USER_NAME', 1003) + time = Column(DateTime,'RULE_EXEC_TIME', 1005) + last_exe_time = Column(DateTime,'RULE_EXEC_LAST_EXE_TIME', 1010) + frequency = Column(String,'RULE_EXEC_FREQUENCY', 1006) + priority = Column(String, 'RULE_EXEC_PRIORITY', 1007) + +# # If needed in 4.2.9, we can update the Query class to dynamically +# # attach this field based on server version: +# context = Column(String, 'RULE_EXEC_CONTEXT', 1012) + +# # These are either unused or usually absent: +# exec_status = Column(String,'RULE_EXEC_STATUS', 1011) +# address = Column(String,'RULE_EXEC_ADDRESS', 1004) +# notification_addr = Column('RULE_EXEC_NOTIFICATION_ADDR', 1009) + + class Zone(Model): id = Column(Integer, 'ZONE_ID', 101) name = Column(String, 'ZONE_NAME', 102) + type = Column(String, 'ZONE_TYPE', 103) class User(Model): @@ -112,6 +133,8 @@ class DataObjectMeta(Model): name = Column(String, 'COL_META_DATA_ATTR_NAME', 600) value = Column(String, 'COL_META_DATA_ATTR_VALUE', 601) units = Column(String, 'COL_META_DATA_ATTR_UNITS', 602) + create_time = Column(DateTime, 'COL_META_DATA_CREATE_TIME', 604) + modify_time = Column(DateTime, 'COL_META_DATA_MODIFY_TIME', 605) class CollectionMeta(Model): @@ -119,6 +142,9 @@ class CollectionMeta(Model): name = Column(String, 'COL_META_COLL_ATTR_NAME', 610) value = Column(String, 'COL_META_COLL_ATTR_VALUE', 611) units = Column(String, 'COL_META_COLL_ATTR_UNITS', 612) + create_time = Column(DateTime, 'COL_META_COLL_CREATE_TIME', 614) + modify_time = Column(DateTime, 'COL_META_COLL_MODIFY_TIME', 615) + class ResourceMeta(Model): @@ -126,6 +152,9 @@ class ResourceMeta(Model): name = Column(String, 'COL_META_RESC_ATTR_NAME', 630) value = Column(String, 'COL_META_RESC_ATTR_VALUE', 631) units = Column(String, 'COL_META_RESC_ATTR_UNITS', 632) + create_time = Column(DateTime, 'COL_META_RESC_CREATE_TIME', 634) + modify_time = Column(DateTime, 'COL_META_RESC_MODIFY_TIME', 635) + class UserMeta(Model): @@ -133,6 +162,9 @@ class UserMeta(Model): name = Column(String, 'COL_META_USER_ATTR_NAME', 640) value = Column(String, 'COL_META_USER_ATTR_VALUE', 641) units = Column(String, 'COL_META_USER_ATTR_UNITS', 642) + create_time = Column(DateTime, 'COL_META_USER_CREATE_TIME', 644) + modify_time = Column(DateTime, 'COL_META_USER_MODIFY_TIME', 645) + class DataAccess(Model): diff --git a/irods/parallel.py b/irods/parallel.py new file mode 100644 index 0000000..52843dc --- /dev/null +++ b/irods/parallel.py @@ -0,0 +1,498 @@ +#!/usr/bin/env python +from __future__ import print_function + +import os +import ssl +import time +import sys +import logging +import contextlib +import concurrent.futures +import threading +import multiprocessing +import six + +from irods.data_object import iRODSDataObject +from irods.exception import DataObjectDoesNotExist +import irods.keywords as kw +from six.moves.queue import Queue,Full,Empty + + +logger = logging.getLogger( __name__ ) +_nullh = logging.NullHandler() +logger.addHandler( _nullh ) + + +MINIMUM_SERVER_VERSION = (4,2,9) + + +try: + from threading import Barrier # Use 'Barrier' class if included (as in Python >= 3.2) ... +except ImportError: # ... but otherwise, use this ad hoc: + class Barrier(object): + def __init__(self, n): + """Initialize a Barrier to wait on n threads.""" + self.n = n + self.count = 0 + self.mutex = threading.Semaphore(1) + self.barrier = threading.Semaphore(0) + def wait(self): + """Per-thread wait function. + + As in Python3.2 threading, returns 0 <= wait_serial_int < n + """ + self.mutex.acquire() + self.count += 1 + count = self.count + self.mutex.release() + if count == self.n: self.barrier.release() + self.barrier.acquire() + self.barrier.release() + return count - 1 + +@contextlib.contextmanager +def enableLogging(handlerType,args,level_ = logging.INFO): + """Context manager for temporarily enabling a logger. For debug or test. + + Usage Example - + with irods.parallel.enableLogging(logging.FileHandler,('/tmp/logfile.txt',)): + # parallel put/get code here + """ + h = None + saveLevel = logger.level + try: + logger.setLevel(level_) + h = handlerType(*args) + h.setLevel( level_ ) + logger.addHandler(h) + yield + finally: + logger.setLevel(saveLevel) + if h in logger.handlers: + logger.removeHandler(h) + + +RECOMMENDED_NUM_THREADS_PER_TRANSFER = 3 + +verboseConnection = False + +class BadCallbackTarget(TypeError): pass + +class AsyncNotify (object): + + """A type returned when the PUT or GET operation passed includes NONBLOCKING. + If enabled, the callback function (or callable object) will be triggered + when all parts of the parallel transfer are complete. It should accept + exactly one argument, the irods.parallel.AsyncNotify instance that + is calling it. + """ + + def set_transfer_done_callback( self, callback ): + if callback is not None: + if not callable(callback): + raise BadCallbackTarget( '"callback" must be a callable accepting at least 1 argument' ) + self.done_callback = callback + + def __init__(self, futuresList, callback = None, progress_Queue = None, total = None, keep_ = ()): + """AsyncNotify initialization (used internally to the io.parallel library). + The casual user will only be concerned with the callback parameter, called when all threads + of the parallel PUT or GET have been terminated and the data object closed. + """ + self._futures = set(futuresList) + self._futures_done = dict() + self.keep = dict(keep_) + self._lock = threading.Lock() + self.set_transfer_done_callback (callback) + self.__done = False + if self._futures: + for future in self._futures: future.add_done_callback( self ) + else: + self.__invoke_done_callback() + + self.progress = [0, 0] + if (progress_Queue) and (total is not None): + self.progress[1] = total + def _progress(Q,this): # - thread to update progress indicator + while this.progress[0] < this.progress[1]: + i = None + try: + i = Q.get(timeout=0.1) + except Empty: + pass + if i is not None: + if isinstance(i,six.integer_types) and i >= 0: this.progress[0] += i + else: break + self._progress_fn = _progress + self._progress_thread = threading.Thread( target = self._progress_fn, args = (progress_Queue, self)) + self._progress_thread.start() + + @staticmethod + def asciiBar( lst, memo = [1] ): + memo[0] += 1 + spinner = "|/-\\"[memo[0]%4] + percent = "%5.1f%%"%(lst[0]*100.0/lst[1]) + mbytes = "%9.1f MB / %9.1f MB"%(lst[0]/1e6,lst[1]/1e6) + if lst[1] != 0: + s = " {spinner} {percent} [ {mbytes} ] " + else: + s = " {spinner} " + return s.format(**locals()) + + def wait_until_transfer_done (self, timeout=float('inf'), progressBar = False): + carriageReturn = '\r' + begin = t = time.time() + end = begin + timeout + while not self.__done: + time.sleep(min(0.1, max(0.0, end - t))) + t = time.time() + if t >= end: break + if progressBar: + print (' ' + self.asciiBar( self.progress ) + carriageReturn, end='', file=sys.stderr) + sys.stderr.flush() + return self.__done + + def __call__(self,future): # Our instance is called by each future (individual file part) when done. + # When all futures are done, we invoke the configured callback. + with self._lock: + self._futures_done[future] = future.result() + if len(self._futures) == len(self._futures_done): self.__invoke_done_callback() + + def __invoke_done_callback(self): + try: + if callable(self.done_callback): self.done_callback(self) + finally: + self.keep.pop('mgr',None) + self.__done = True + self.set_transfer_done_callback(None) + + @property + def futures(self): return list(self._futures) + + @property + def futures_done(self): return dict(self._futures_done) + + +class Oper(object): + """A custom enum-type class with utility methods. """ + + GET = 0 + PUT = 1 + NONBLOCKING = 2 + + def __int__(self): + """Return the stored flags as an integer bitmask. """ + return self._opr + + def __init__(self, rhs): + """Initialize with a bit mask of flags ie. whether Operation PUT or GET, + and whether NONBLOCKING.""" + self._opr = int(rhs) + + def isPut(self): return 0 != (self._opr & self.PUT) + def isGet(self): return not self.isPut() + def isNonBlocking(self): return 0 != (self._opr & self.NONBLOCKING) + + def data_object_mode(self, initial_open = False): + if self.isPut(): + return 'w' if initial_open else 'a' + else: + return 'r' + + def disk_file_mode(self, initial_open = False, binary = True): + if self.isPut(): + mode = 'r' + else: + mode = 'w' if initial_open else 'r+' + return ((mode + 'b') if binary else mode) + + +def _io_send_bytes_progress (queueObject, item): + try: + queueObject.put(item) + return True + except Full: + return False + +COPY_BUF_SIZE = (1024 ** 2) * 4 + +def _copy_part( src, dst, length, queueObject, debug_info, mgr): + + bytecount = 0 + accum = 0 + while True and bytecount < length: + buf = src.read(min(COPY_BUF_SIZE, length - bytecount)) + buf_len = len(buf) + if 0 == buf_len: break + dst.write(buf) + bytecount += buf_len + accum += buf_len + if queueObject and accum and _io_send_bytes_progress(queueObject,accum): accum = 0 + if verboseConnection: + print ("("+debug_info+")",end='',file=sys.stderr) + sys.stderr.flush() + + # In a put or get, exactly one of (src,dst) is a file. Find which and close that one first. + (file_,obj_) = (src,dst) if dst in mgr else (dst,src) + file_.close() + mgr.remove_io( obj_ ) # 1. closes obj if it is not the mgr's initial descriptor + # 2. blocks at barrier until all transfer threads are done copying + # 3. closes with finalize if obj is mgr's initial descriptor + return bytecount + + +class _Multipart_close_manager: + + def __init__(self, initial_io_, exit_barrier_): + self.exit_barrier = exit_barrier_ + self.initial_io = initial_io_ + self.__lock = threading.Lock() + self.aux = [] + + def __contains__(self,Io): + with self.__lock: + return Io is self.initial_io or \ + Io in self.aux + + # `add_io' - add an i/o object to be managed + # note: `remove_io' should only be called for managed i/o objects + + def add_io(self,Io): + with self.__lock: + if Io is not self.initial_io: + self.aux.append(Io) + + # `remove_io' is for closing a channel of parallel i/o and allowing the + # data object to flush write operations (if any) in a timely fashion. It also + # synchronizes all of the parallel threads just before exit, so that we know + # exactly when to perform a finalizing close on the data object + + def remove_io(self,Io): + is_initial = True + with self.__lock: + if Io is not self.initial_io: + Io.close() + self.aux.remove(Io) + is_initial = False + self.exit_barrier.wait() + if is_initial: self.finalize() + + def finalize(self): + self.initial_io.close() + + +def _io_part (objHandle, range_, file_, opr_, mgr_, thread_debug_id = '', queueObject = None ): + if 0 == len(range_): return 0 + Operation = Oper(opr_) + (offset,length) = (range_[0], len(range_)) + objHandle.seek(offset) + file_.seek(offset) + if thread_debug_id == '': + thread_debug_id = str(threading.currentThread().ident) + return ( _copy_part (file_, objHandle, length, queueObject, thread_debug_id, mgr_) if Operation.isPut() + else _copy_part (objHandle, file_, length, queueObject, thread_debug_id, mgr_) ) + + +def _io_multipart_threaded(operation_ , dataObj_and_IO, replica_token, hier_str, session, fname, + total_size, num_threads = 0, **extra_options): + """Called by _io_main. + Carve up (0,total_size) range into `num_threads` parts and initiate a transfer thread for each one.""" + (D, Io) = dataObj_and_IO + Operation = Oper( operation_ ) + + if num_threads < 1: + num_threads = RECOMMENDED_NUM_THREADS_PER_TRANSFER + num_threads = max(1, min(multiprocessing.cpu_count(), num_threads)) + + P = 1 + (total_size // num_threads) + logger.info("num_threads = %s ; (P)artitionSize = %s", num_threads, P) + ranges = [six.moves.range(i*P,min(i*P+P,total_size)) for i in range(num_threads) if i*P < total_size] + + _queueLength = extra_options.get('_queueLength',0) + if _queueLength > 0: + queueObject = Queue(_queueLength) + else: + queueObject = None + + futures = [] + executor = concurrent.futures.ThreadPoolExecutor(max_workers = num_threads) + num_threads = min(num_threads, len(ranges)) + mgr = _Multipart_close_manager(Io, Barrier(num_threads)) + counter = 1 + gen_file_handle = lambda: open(fname, Operation.disk_file_mode(initial_open = (counter == 1))) + File = gen_file_handle() + for r in ranges: + if Io is None: + Io = session.data_objects.open( D.path, Operation.data_object_mode(initial_open = False), + create = False, finalize_on_close = False, + **{kw.RESC_HIER_STR_KW: hier_str, kw.REPLICA_TOKEN_KW: replica_token} ) + mgr.add_io( Io ) + if File is None: File = gen_file_handle() + futures.append(executor.submit( _io_part, Io, r, File, Operation, mgr, str(counter), queueObject)) + counter += 1 + Io = File = None + + if Operation.isNonBlocking(): + if _queueLength: + return futures, queueObject, mgr + else: + return futures + else: + bytecounts = [ f.result() for f in futures ] + return sum(bytecounts), total_size + + +# _io_main +# * Entry point for parallel transfers (multithreaded PUT and GET operations) +# * determine replica information +# * call multithread manager + +def io_main( session, Data, opr_, fname, R='', **kwopt): + + Operation = Oper(opr_) + d_path = None + Io = None + if isinstance(Data,tuple): + (Data, Io) = Data[:2] + if isinstance (Data, six.string_types): + d_path = Data + try: + Data = session.data_objects.get( Data ) + d_path = Data.path + except DataObjectDoesNotExist: + if Operation.isGet(): raise + + R_via_libcall = kwopt.pop( 'target_resource_name', '') + if R_via_libcall: + R = R_via_libcall + + resc_options = {} + if Operation.isPut(): + if R: + resc_options [kw.RESC_NAME_KW] = R + resc_options [kw.DEST_RESC_NAME_KW] = R + + if (not Io): + (Io, rawfile) = session.data_objects.open_with_FileRaw( (d_path or Data.path), + Operation.data_object_mode(initial_open = True), + finalize_on_close = True, **resc_options ) + else: + rawfile = Io.raw + + # data object should now exist + if not isinstance(Data,iRODSDataObject): + Data = session.data_objects.get(d_path) + + if Operation.isGet(): + total_bytes = Io.seek(0,os.SEEK_END) + Io.seek(0,os.SEEK_SET) + else: + with open(fname, 'rb') as f: + f.seek(0,os.SEEK_END) + total_bytes = f.tell() + + (replica_token , resc_hier) = rawfile.replica_access_info() + + num_threads = kwopt.pop( 'num_threads', None) + + if num_threads is None: num_threads = int(kwopt.get('N','0')) + + queueLength = kwopt.get('queueLength',0) + retval = _io_multipart_threaded (Operation, (Data, Io), replica_token, resc_hier, session, fname, total_bytes, + num_threads = num_threads, + _queueLength = queueLength) + + # SessionObject.data_objects.parallel_{put,get} will return: + # - immediately with an AsyncNotify instance, if Oper.NONBLOCKING flag is used. + # - upon completion with a boolean completion status, otherwise. + + if Operation.isNonBlocking(): + + if queueLength > 0: + (futures, chunk_notify_queue, mgr) = retval + else: + futures = retval + chunk_notify_queue = total_bytes = None + + return AsyncNotify( futures, # individual futures, one per transfer thread + progress_Queue = chunk_notify_queue, # for notifying the progress indicator thread + total = total_bytes, # total number of bytes for parallel transfer + keep_ = {'mgr': mgr} ) # an open raw i/o object needing to be persisted, if any + else: + (_bytes_transferred, _bytes_total) = retval + return (_bytes_transferred == _bytes_total) + +if __name__ == '__main__': + + import getopt + import atexit + from irods.session import iRODSSession + + def setupLoggingWithDateTimeHeader(name,level = logging.DEBUG): + if _nullh in logger.handlers: + logger.removeHandler(_nullh) + if name: + handler = logging.FileHandler(name) + else: + handler = logging.StreamHandler() + handler.setFormatter(logging.Formatter('%(asctime)-15s - %(message)s')) + logger.addHandler(handler) + logger.setLevel( level ) + + try: + env_file = os.environ['IRODS_ENVIRONMENT_FILE'] + except KeyError: + env_file = os.path.expanduser('~/.irods/irods_environment.json') + ssl_context = ssl.create_default_context(purpose=ssl.Purpose.SERVER_AUTH, cafile=None, capath=None, cadata=None) + ssl_settings = {'ssl_context': ssl_context} + sess = iRODSSession(irods_env_file=env_file, **ssl_settings) + atexit.register(lambda : sess.cleanup()) + + opt,arg = getopt.getopt( sys.argv[1:], 'vL:l:aR:N:') + + opts = dict(opt) + + logFilename = opts.pop('-L',None) # '' for console, non-empty for filesystem destination + logLevel = (logging.INFO if logFilename is None else logging.DEBUG) + logFilename = logFilename or opts.pop('-l',None) + + if logFilename is not None: + setupLoggingWithDateTimeHeader(logFilename, logLevel) + + verboseConnection = (opts.pop('-v',None) is not None) + + async_xfer = opts.pop('-a',None) + + kwarg = { k.lstrip('-'):v for k,v in opts.items() } + + arg[1] = Oper.PUT if arg[1].lower() in ('w','put','a') \ + else Oper.GET + if async_xfer is not None: + arg[1] |= Oper.NONBLOCKING + + ret = io_main(sess, *arg, **kwarg) # arg[0] = data object or path + # arg[1] = operation: or'd flags : [PUT|GET] NONBLOCKING + # arg[2] = file path on local filesystem + # kwarg['queueLength'] sets progress-queue length (0 if no progress indication needed) + # kwarg options 'N' (num threads) and 'R' (target resource name) are via command-line + # kwarg['num_threads'] (overrides 'N' when called as a library) + # kwarg['target_resource_name'] (overrides 'R' when called as a library) + if isinstance( ret, AsyncNotify ): + print('waiting on completion...',file=sys.stderr) + ret.set_transfer_done_callback(lambda r: print('Async transfer done for:',r,file=sys.stderr)) + done = ret.wait_until_transfer_done (timeout=10.0) # - or do other useful work here + if done: + bytes_transferred = sum(ret.futures_done.values()) + print ('Asynch transfer complete. Total bytes transferred:', bytes_transferred,file=sys.stderr) + else: + print ('Asynch transfer was not completed before timeout expired.',file=sys.stderr) + else: + print('Synchronous transfer {}'.format('succeeded' if ret else 'failed'),file=sys.stderr) + +# Note : This module requires concurrent.futures, included in Python3.x. +# On Python2.7, this dependency must be installed using 'pip install futures'. +# Demonstration : +# +# $ dd if=/dev/urandom bs=1k count=150000 of=$HOME/puttest +# $ time python -m irods.parallel -R demoResc -N 3 `ipwd`/test.dat put $HOME/puttest # add -v,-a for verbose, asynch +# $ time python -m irods.parallel -R demoResc -N 3 `ipwd`/test.dat get $HOME/gettest # add -v,-a for verbose, asynch +# $ diff puttest gettest diff --git a/irods/password_obfuscation.py b/irods/password_obfuscation.py index a6f3a0e..ef38550 100644 --- a/irods/password_obfuscation.py +++ b/irods/password_obfuscation.py @@ -275,3 +275,11 @@ def obfuscate_new_password(new, old, signature): new = new + padding[:lcopy] return scramble_v2(new, old, signature) + + +def create_temp_password(temp_hash, source_password): + password = (temp_hash + source_password).ljust(100, chr(0)) + password_md5 = hashlib.md5(password.encode('utf-8')) + + # Return hexdigest + return password_md5.hexdigest() diff --git a/irods/pool.py b/irods/pool.py index b6c0257..a113d9e 100644 --- a/irods/pool.py +++ b/irods/pool.py @@ -1,39 +1,98 @@ from __future__ import absolute_import +import datetime import logging import threading +import os from irods import DEFAULT_CONNECTION_TIMEOUT from irods.connection import Connection logger = logging.getLogger(__name__) +def attribute_from_return_value(attrname): + def deco(method): + def method_(self,*s,**kw): + ret = method(self,*s,**kw) + setattr(self,attrname,ret) + return ret + return method_ + return deco + +DEFAULT_APPLICATION_NAME = 'python-irodsclient' class Pool(object): currentAuth=None - def __init__(self, account, block_on_authURL=True): + + def __init__(self, account, application_name='', connection_refresh_time=-1, block_on_authURL=True): + ''' + Pool( account , application_name='' ) + Create an iRODS connection pool; 'account' is an irods.account.iRODSAccount instance and + 'application_name' specifies the application name as it should appear in an 'ips' listing. + ''' + + self._thread_local = threading.local() self.account = account self.block_on_authURL=block_on_authURL self._lock = threading.RLock() self.active = set() self.idle = set() self.connection_timeout = DEFAULT_CONNECTION_TIMEOUT + self.application_name = ( os.environ.get('spOption','') or + application_name or + DEFAULT_APPLICATION_NAME ) + + if connection_refresh_time > 0: + self.refresh_connection = True + self.connection_refresh_time = connection_refresh_time + else: + self.refresh_connection = False + self.connection_refresh_time = None + + @property + def _conn(self): return getattr( self._thread_local, "_conn", None) + + @_conn.setter + def _conn(self, conn_): setattr( self._thread_local, "_conn", conn_) + @attribute_from_return_value("_conn") def get_connection(self): with self._lock: try: conn = self.idle.pop() + + curr_time = datetime.datetime.now() + # If 'refresh_connection' flag is True and the connection was + # created more than 'connection_refresh_time' seconds ago, + # release the connection (as its stale) and create a new one + if self.refresh_connection and (curr_time - conn.create_time).total_seconds() > self.connection_refresh_time: + logger.debug('Connection with id {} was created more than {} seconds ago. Releasing the connection and creating a new one.'.format(id(conn), self.connection_refresh_time)) + self.release_connection(conn, True) + conn = Connection(self, self.account) + logger.debug("Created new connection with id: {}".format(id(conn))) except KeyError: conn = Connection(self, self.account, block_on_authURL=self.block_on_authURL) + logger.debug("No connection found in idle set. Created a new connection with id: {}".format(id(conn))) + self.active.add(conn) + logger.debug("Adding connection with id {} to active set".format(id(conn))) + logger.debug('num active: {}'.format(len(self.active))) + logger.debug('num idle: {}'.format(len(self.idle))) return conn def release_connection(self, conn, destroy=False): with self._lock: if conn in self.active: self.active.remove(conn) + logger.debug("Removed connection with id: {} from active set".format(id(conn))) if not destroy: + # If 'refresh_connection' flag is True, update connection's 'last_used_time' + if self.refresh_connection: + conn.last_used_time = datetime.datetime.now() self.idle.add(conn) + logger.debug("Added connection with id: {} to idle set".format(id(conn))) elif conn in self.idle and destroy: + logger.debug("Destroyed connection with id: {}".format(id(conn))) self.idle.remove(conn) + logger.debug('num active: {}'.format(len(self.active))) logger.debug('num idle: {}'.format(len(self.idle))) diff --git a/irods/query.py b/irods/query.py index 2006711..0d9f7f4 100644 --- a/irods/query.py +++ b/irods/query.py @@ -70,7 +70,7 @@ def filter(self, *criteria): def order_by(self, column, order='asc'): new_q = self._clone() - del new_q.columns[column] + new_q.columns.pop(column,None) if order == 'asc': new_q.columns[column] = query_number['ORDER_BY'] elif order == 'desc': @@ -193,15 +193,20 @@ def all(self): def get_batches(self): result_set = self.execute() - yield result_set - while result_set.continue_index > 0: - try: - result_set = self.continue_index( - result_set.continue_index).execute() - yield result_set - except CAT_NO_ROWS_FOUND: - break + try: + yield result_set + + while result_set.continue_index > 0: + try: + result_set = self.continue_index( + result_set.continue_index).execute() + yield result_set + except CAT_NO_ROWS_FOUND: + break + except GeneratorExit: + if result_set.continue_index > 0: + self.continue_index(result_set.continue_index).close() def get_results(self): for result_set in self.get_batches(): @@ -213,6 +218,8 @@ def __iter__(self): def one(self): results = self.execute() + if results.continue_index > 0: + self.continue_index(results.continue_index).close() if not len(results): raise NoResultFound() if len(results) > 1: @@ -222,6 +229,8 @@ def one(self): def first(self): query = self.limit(1) results = query.execute() + if results.continue_index > 0: + query.continue_index(results.continue_index).close() if not len(results): return None else: @@ -288,7 +297,7 @@ def execute(self, limit=MAX_SQL_ROWS, offset=0, options=0, conditions=None): conditions = StringStringMap({}) sql_args = {} - for i, arg in enumerate(self._args[:10]): + for i, arg in enumerate(self._args[:10], start=1): sql_args['arg{}'.format(i)] = arg message_body = SpecificQueryRequest(sql=target, diff --git a/irods/resource.py b/irods/resource.py index 7ddd368..c87a7a7 100644 --- a/irods/resource.py +++ b/irods/resource.py @@ -1,5 +1,6 @@ from __future__ import absolute_import from irods.models import Resource +from irods.meta import iRODSMetaCollection import six @@ -37,6 +38,12 @@ def __init__(self, manager, result=None): self._meta = None + @property + def metadata(self): + if not self._meta: + self._meta = iRODSMetaCollection( + self.manager.sess.metadata, Resource, self.name) + return self._meta @property def context_fields(self): diff --git a/irods/rule.py b/irods/rule.py index c81800c..a36142c 100644 --- a/irods/rule.py +++ b/irods/rule.py @@ -1,39 +1,50 @@ from __future__ import absolute_import -import six from irods.message import iRODSMessage, StringStringMap, RodsHostAddress, STR_PI, MsParam, MsParamArray, RuleExecutionRequest from irods.api_number import api_number +from io import open as io_open +from irods.message import Message, StringProperty -if six.PY3: - from html import escape -else: - from cgi import escape - -import logging - -logger = logging.getLogger(__name__) - +class RemoveRuleMessage(Message): + #define RULE_EXEC_DEL_INP_PI "str ruleExecId[NAME_LEN];" + _name = 'RULE_EXEC_DEL_INP_PI' + ruleExecId = StringProperty() + def __init__(self,id_): + super(RemoveRuleMessage,self).__init__() + self.ruleExecId = str(id_) class Rule(object): def __init__(self, session, rule_file=None, body='', params=None, output=''): self.session = session + self.params = {} + self.output = '' + if rule_file: self.load(rule_file) else: - self.body = '@external\n' + escape(body, quote=True) - if params is None: - self.params = {} - else: - self.params = params + self.body = '@external\n' + body + + # overwrite params and output if received arguments + if params is not None: + self.params = params + if output != '': self.output = output - def load(self, rule_file): - self.params = {} - self.output = '' + def remove_by_id(self,*ids): + with self.session.pool.get_connection() as conn: + for id_ in ids: + request = iRODSMessage("RODS_API_REQ", msg=RemoveRuleMessage(id_), + int_info=api_number['RULE_EXEC_DEL_AN']) + conn.send(request) + response = conn.recv() + if response.int_info != 0: + raise RuntimeError("Error removing rule {id_}".format(**locals())) + + def load(self, rule_file, encoding = 'utf-8'): self.body = '@external\n' # parse rule file - with open(rule_file) as f: + with io_open(rule_file, encoding = encoding) as f: for line in f: # parse input line if line.strip().lower().startswith('input'): @@ -61,14 +72,14 @@ def load(self, rule_file): # parse rule else: - self.body += escape(line, quote=True) + self.body += line def execute(self): # rule input param_array = [] for label, value in self.params.items(): - inOutStruct = STR_PI(myStr=escape(value, quote=True)) + inOutStruct = STR_PI(myStr=value) param_array.append(MsParam(label=label, type='STR_PI', inOutStruct=inOutStruct)) inpParamArray = MsParamArray(paramLen=len(param_array), oprType=0, MsParam_PI=param_array) diff --git a/irods/session.py b/irods/session.py index 9be3723..5a65468 100644 --- a/irods/session.py +++ b/irods/session.py @@ -1,6 +1,7 @@ from __future__ import absolute_import import os import json +import logging from irods.query import Query from irods.pool import Pool from irods.account import iRODSAccount @@ -10,8 +11,12 @@ from irods.manager.access_manager import AccessManager from irods.manager.user_manager import UserManager, UserGroupManager from irods.manager.resource_manager import ResourceManager +from irods.manager.zone_manager import ZoneManager from irods.exception import NetworkException from irods.password_obfuscation import decode +from irods import NATIVE_AUTH_SCHEME, PAM_AUTH_SCHEME + +logger = logging.getLogger(__name__) class iRODSSession(object): @@ -19,8 +24,10 @@ def __init__(self, configure=True, **kwargs): self.pool = None self.numThreads = 0 + self.do_configure = (kwargs if configure else {}) + self.__configured = None if configure: - self.configure(**kwargs) + self.__configured = self.configure(**kwargs) self.collections = CollectionManager(self) self.data_objects = DataObjectManager(self) @@ -29,6 +36,7 @@ def __init__(self, configure=True, **kwargs): self.users = UserManager(self) self.user_groups = UserGroupManager(self) self.resources = ResourceManager(self) + self.zones = ZoneManager(self) def __enter__(self): return self @@ -36,6 +44,10 @@ def __enter__(self): def __exit__(self, exc_type, exc_value, traceback): self.cleanup() + def __del__(self): + self.do_configure = {} + self.cleanup() + def cleanup(self): for conn in self.pool.active | self.pool.idle: try: @@ -43,8 +55,11 @@ def cleanup(self): except NetworkException: pass conn.release(True) + if self.do_configure: + self.__configured = self.configure(**self.do_configure) def _configure_account(self, **kwargs): + try: env_file = kwargs['irods_env_file'] @@ -73,7 +88,13 @@ def _configure_account(self, **kwargs): # default auth_scheme = 'native' - if auth_scheme != 'native': + if auth_scheme.lower() == PAM_AUTH_SCHEME: + if 'password' in creds: + return iRODSAccount(**creds) + else: + # password will be from irodsA file therefore use native login + creds['irods_authentication_scheme'] = NATIVE_AUTH_SCHEME + elif auth_scheme != 'native': return iRODSAccount(**creds) # Native auth, try to unscramble password @@ -86,10 +107,17 @@ def _configure_account(self, **kwargs): return iRODSAccount(**creds) - def configure(self, **kwargs): - account = self._configure_account(**kwargs) - self.pool = Pool(account, kwargs.get ('block_on_authURL', True)) + account = self.__configured + if not account: + account = self._configure_account(**kwargs) + connection_refresh_time = self.get_connection_refresh_time(**kwargs) + self.pool = Pool(account, application_name=kwargs.pop('application_name',''), connection_refresh_time=connection_refresh_time, + block_on_authURL=kwargs.get ('block_on_authURL', True)) + logger.debug("In iRODSSession's configure(). connection_refresh_time set to {}".format(connection_refresh_time)) + self.pool = Pool(account, application_name=kwargs.pop('application_name',''), connection_refresh_time=connection_refresh_time, + block_on_authURL=kwargs.get ('block_on_authURL', True)) + return account def query(self, *args): return Query(self, *args) @@ -121,6 +149,15 @@ def server_version(self): conn.release() return version + @property + def pam_pw_negotiated(self): + self.pool.account.store_pw = [] + conn = self.pool.get_connection() + pw = getattr(self.pool.account,'store_pw',[]) + delattr( self.pool.account, 'store_pw') + conn.release() + return pw + @property def default_resource(self): return self.pool.account.default_resource @@ -146,8 +183,12 @@ def get_irods_password_file(): @staticmethod def get_irods_env(env_file): - with open(env_file, 'rt') as f: - return json.load(f) + try: + with open(env_file, 'rt') as f: + return json.load(f) + except IOError: + logger.debug("Could not open file {}".format(env_file)) + return {} @staticmethod def get_irods_password(**kwargs): @@ -163,3 +204,25 @@ def get_irods_password(**kwargs): with open(irods_auth_file, 'r') as f: return decode(f.read().rstrip('\n'), uid) + + def get_connection_refresh_time(self, **kwargs): + connection_refresh_time = -1 + + connection_refresh_time = int(kwargs.get('refresh_time', -1)) + if connection_refresh_time != -1: + return connection_refresh_time + + try: + env_file = kwargs['irods_env_file'] + except KeyError: + return connection_refresh_time + + if env_file is not None: + env_file_map = self.get_irods_env(env_file) + connection_refresh_time = int(env_file_map.get('irods_connection_refresh_time', -1)) + if connection_refresh_time < 1: + # Negative values are not allowed. + logger.debug('connection_refresh_time in {} file has value of {}. Only values greater than 1 are allowed.'.format(env_file, connection_refresh_time)) + connection_refresh_time = -1 + + return connection_refresh_time diff --git a/irods/test/access_test.py b/irods/test/access_test.py index 0d1c39f..10f76b4 100644 --- a/irods/test/access_test.py +++ b/irods/test/access_test.py @@ -4,7 +4,10 @@ import sys import unittest from irods.access import iRODSAccess +from irods.user import iRODSUser +from irods.models import User import irods.test.helpers as helpers +from irods.column import In class TestAccess(unittest.TestCase): @@ -22,6 +25,7 @@ def tearDown(self): self.coll.remove(recurse=True, force=True) self.sess.cleanup() + def test_list_acl(self): # test args collection = self.coll_path @@ -114,6 +118,48 @@ def test_set_collection_acl(self): acl1 = iRODSAccess('own', coll.path, user.name, user.zone) self.sess.permissions.set(acl1) + mapping = dict( [ (i,i) for i in ('modify object', 'read object', 'own') ] + + [ ('write','modify object') , ('read', 'read object') ] + ) + + @classmethod + def perms_lists_symm_diff ( cls, a_iter, b_iter ): + fields = lambda perm: (cls.mapping[perm.access_name], perm.user_name, perm.user_zone) + A = set (map(fields,a_iter)) + B = set (map(fields,b_iter)) + return (A-B) | (B-A) + + def test_raw_acls__207(self): + data = helpers.make_object(self.sess,"/".join((self.coll_path,"test_obj"))) + eg = eu = fg = fu = None + try: + eg = self.sess.user_groups.create ('egrp') + eu = self.sess.users.create ('edith','rodsuser') + eg.addmember(eu.name,eu.zone) + fg = self.sess.user_groups.create ('fgrp') + fu = self.sess.users.create ('frank','rodsuser') + fg.addmember(fu.name,fu.zone) + my_ownership = set([('own', self.sess.username, self.sess.zone)]) + #--collection-- + perms1data = [ iRODSAccess ('write',self.coll_path, eg.name, self.sess.zone), + iRODSAccess ('read', self.coll_path, fu.name, self.sess.zone) + ] + for perm in perms1data: self.sess.permissions.set ( perm ) + p1 = self.sess.permissions.get ( self.coll, report_raw_acls = True) + self.assertEqual(self.perms_lists_symm_diff( perms1data, p1 ), my_ownership) + #--data object-- + perms2data = [ iRODSAccess ('write',data.path, fg.name, self.sess.zone), + iRODSAccess ('read', data.path, eu.name, self.sess.zone) + ] + for perm in perms2data: self.sess.permissions.set ( perm ) + p2 = self.sess.permissions.get ( data, report_raw_acls = True) + self.assertEqual(self.perms_lists_symm_diff( perms2data, p2 ), my_ownership) + finally: + ids_for_delete = [ u.id for u in (fu,fg,eu,eg) if u is not None ] + for u in [ iRODSUser(self.sess.users,row) + for row in self.sess.query(User).filter(In(User.id, ids_for_delete)) ]: + u.remove() + if __name__ == '__main__': # let the tests find the parent irods lib diff --git a/irods/test/admin_test.py b/irods/test/admin_test.py index 883acec..6f57508 100644 --- a/irods/test/admin_test.py +++ b/irods/test/admin_test.py @@ -2,14 +2,13 @@ from __future__ import absolute_import import os import sys -import string -import random import unittest from irods.models import User from irods.exception import UserDoesNotExist, ResourceDoesNotExist from irods.session import iRODSSession from irods.resource import iRODSResource import irods.test.helpers as helpers +import irods.keywords as kw class TestAdmin(unittest.TestCase): @@ -155,30 +154,33 @@ def test_make_compound_resource(self): session.resources.add_child(comp.name, ufs1.name, 'archive') session.resources.add_child(comp.name, ufs2.name, 'cache') - # create object on compound resource - obj = session.data_objects.create(obj_path, comp.name) + obj = None - # write to object - with obj.open('w+') as obj_desc: - obj_desc.write(dummy_str) + try: + # create object on compound resource + obj = session.data_objects.create(obj_path, resource = comp.name) - # refresh object - obj = session.data_objects.get(obj_path) + # write to object + with obj.open('w+',**{kw.DEST_RESC_NAME_KW:comp.name}) as obj_desc: + obj_desc.write(dummy_str) - # check that we have 2 replicas - self.assertEqual(len(obj.replicas), 2) + # refresh object + obj = session.data_objects.get(obj_path) - # remove object - obj.unlink(force=True) + # check that we have 2 replicas + self.assertEqual(len(obj.replicas), 2) + finally: + # remove object + if obj: obj.unlink(force=True) - # remove children from compound resource - session.resources.remove_child(comp.name, ufs1.name) - session.resources.remove_child(comp.name, ufs2.name) + # remove children from compound resource + session.resources.remove_child(comp.name, ufs1.name) + session.resources.remove_child(comp.name, ufs2.name) - # remove resources - ufs1.remove() - ufs2.remove() - comp.remove() + # remove resources + ufs1.remove() + ufs2.remove() + comp.remove() def test_get_resource_children(self): @@ -265,6 +267,9 @@ def test_resource_context_string(self): def test_make_ufs_resource(self): + RESC_PATH_BASE = helpers.irods_shared_tmp_dir() + if not(RESC_PATH_BASE) and not helpers.irods_session_host_local (self.sess): + self.skipTest('for non-local server with shared tmp dir missing') # test data resc_name = 'temporary_test_resource' if self.sess.server_version < (4, 0, 0): @@ -306,7 +311,9 @@ def test_make_ufs_resource(self): obj = self.sess.data_objects.create(obj_path, resc_name) # write something to the file - with obj.open('w+') as obj_desc: + # (can omit use of DEST_RESC_NAME_KW on resolution of + # https://github.com/irods/irods/issues/5548 ) + with obj.open('w+', **{kw.DEST_RESC_NAME_KW: resc_name} ) as obj_desc: obj_desc.write(dummy_str) # refresh object (size has changed) @@ -332,8 +339,8 @@ def test_set_user_password(self): zone = self.sess.zone self.sess.users.create(self.new_user_name, self.new_user_type) - # make a 12 character pseudo-random password - new_password = ''.join(random.choice(string.ascii_letters + string.digits + string.punctuation) for _ in range(12)) + # make a really horrible password + new_password = '''abc123!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~Z''' self.sess.users.modify(username, 'password', new_password) # open a session as the new user @@ -354,6 +361,46 @@ def test_set_user_password(self): self.sess.users.get(self.new_user_name) + def test_set_user_comment(self): + # make a new user + self.sess.users.create(self.new_user_name, self.new_user_type) + + # modify user comment + new_comment = '''comment-abc123!"#$%&'()*+,-./:;<=>?@[\]^_{|}~Z''' # omitting backtick due to #170 + self.sess.users.modify(self.new_user_name, 'comment', new_comment) + + # check comment was modified + new_user = self.sess.users.get(self.new_user_name) + self.assertEqual(new_user.comment, new_comment) + + # delete new user + self.sess.users.remove(self.new_user_name) + + # user should be gone + with self.assertRaises(UserDoesNotExist): + self.sess.users.get(self.new_user_name) + + + def test_set_user_info(self): + # make a new user + self.sess.users.create(self.new_user_name, self.new_user_type) + + # modify user info + new_info = '''info-abc123!"#$%&'()*+,-./:;<=>?@[\]^_{|}~Z''' # omitting backtick due to #170 + self.sess.users.modify(self.new_user_name, 'info', new_info) + + # check info was modified + new_user = self.sess.users.get(self.new_user_name) + self.assertEqual(new_user.info, new_info) + + # delete new user + self.sess.users.remove(self.new_user_name) + + # user should be gone + with self.assertRaises(UserDoesNotExist): + self.sess.users.get(self.new_user_name) + + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/test/collection_test.py b/irods/test/collection_test.py index fc811dd..d0f0030 100644 --- a/irods/test/collection_test.py +++ b/irods/test/collection_test.py @@ -241,13 +241,15 @@ def test_collection_metadata(self): def test_register_collection(self): - if self.sess.host not in ('localhost', socket.gethostname()): + tmp_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(tmp_dir) and not(loc_server): self.skipTest('Requires access to server-side file(s)') # test vars file_count = 10 dir_name = 'register_test_dir' - dir_path = os.path.join('/tmp', dir_name) + dir_path = os.path.join((tmp_dir or '/tmp'), dir_name) coll_path = '{}/{}'.format(self.test_coll.path, dir_name) # make test dir @@ -272,13 +274,15 @@ def test_register_collection(self): def test_register_collection_with_checksums(self): - if self.sess.host not in ('localhost', socket.gethostname()): + tmp_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(tmp_dir) and not(loc_server): self.skipTest('Requires access to server-side file(s)') # test vars file_count = 10 - dir_name = 'register_test_dir' - dir_path = os.path.join('/tmp', dir_name) + dir_name = 'register_test_dir_with_chksums' + dir_path = os.path.join((tmp_dir or '/tmp'), dir_name) coll_path = '{}/{}'.format(self.test_coll.path, dir_name) # make test dir diff --git a/irods/test/data_obj_test.py b/irods/test/data_obj_test.py index 424338e..ef03582 100644 --- a/irods/test/data_obj_test.py +++ b/irods/test/data_obj_test.py @@ -9,24 +9,48 @@ import random import string import unittest +import contextlib # check if redundant +import logging +import io +import re + from irods.models import Collection, DataObject -from irods.session import iRODSSession import irods.exception as ex from irods.column import Criterion from irods.data_object import chunks import irods.test.helpers as helpers import irods.keywords as kw +from irods.manager import data_object_manager from datetime import datetime +from tempfile import NamedTemporaryFile +from irods.test.helpers import (unique_name, my_function_name) +import irods.parallel + + +def make_ufs_resc_in_tmpdir(session, base_name, allow_local = False): + tmpdir = helpers.irods_shared_tmp_dir() + if not tmpdir and allow_local: + tmpdir = os.getenv('TMPDIR') or '/tmp' + if not tmpdir: + raise RuntimeError("Must have filesystem path shareable with server.") + full_phys_dir = os.path.join(tmpdir,base_name) + if not os.path.exists(full_phys_dir): os.mkdir(full_phys_dir) + session.resources.create(base_name,'unixfilesystem',session.host,full_phys_dir) + return full_phys_dir + class TestDataObjOps(unittest.TestCase): - def setUp(self): - self.sess = helpers.make_session() + from irods.test.helpers import (create_simple_resc) + + def setUp(self): # Create test collection + self.sess = helpers.make_session() self.coll_path = '/{}/home/{}/test_dir'.format(self.sess.zone, self.sess.username) self.coll = helpers.make_collection(self.sess, self.coll_path) - + with self.sess.pool.get_connection() as conn: + self.SERVER_VERSION = conn.server_version def tearDown(self): '''Remove test data and close connections @@ -34,6 +58,103 @@ def tearDown(self): self.coll.remove(recurse=True, force=True) self.sess.cleanup() + @staticmethod + def In_Memory_Stream(): + return io.BytesIO() if sys.version_info < (3,) else io.StringIO() + + + @contextlib.contextmanager + def create_resc_hierarchy (self, Root, Leaf = None): + if not Leaf: + Leaf = 'simple_leaf_resc_' + unique_name (my_function_name(), datetime.now()) + y_value = (Root,Leaf) + else: + y_value = ';'.join([Root,Leaf]) + self.sess.resources.create(Leaf,'unixfilesystem', + host = self.sess.host, + path='/tmp/' + Leaf) + self.sess.resources.create(Root,'passthru') + self.sess.resources.add_child(Root,Leaf) + try: + yield y_value + finally: + self.sess.resources.remove_child(Root,Leaf) + self.sess.resources.remove(Leaf) + self.sess.resources.remove(Root) + + def test_put_get_parallel_autoswitch_A__235(self): + if not self.sess.data_objects.should_parallelize_transfer(server_version_hint = self.SERVER_VERSION): + self.skipTest('Skip unless detected server version is 4.2.9') + if getattr(data_object_manager,'DEFAULT_NUMBER_OF_THREADS',None) in (1, None): + self.skipTest('Data object manager not configured for parallel puts and gets') + Root = 'pt235' + Leaf = 'resc235' + files_to_delete = [] + # This test does the following: + # - set up a small resource hierarchy and generate a file large enough to trigger parallel transfer + # - `put' the file to iRODS, then `get' it back, comparing the resulting two disk files and making + # sure that the parallel routines were invoked to do both transfers + + with self.create_resc_hierarchy(Root) as (Root_ , Leaf): + self.assertEqual(Root , Root_) + self.assertIsInstance( Leaf, str) + datafile = NamedTemporaryFile (prefix='getfromhier_235_',delete=True) + datafile.write( os.urandom( data_object_manager.MAXIMUM_SINGLE_THREADED_TRANSFER_SIZE + 1 )) + datafile.flush() + base_name = os.path.basename(datafile.name) + data_obj_name = '/{0.zone}/home/{0.username}/{1}'.format(self.sess, base_name) + options = { kw.DEST_RESC_NAME_KW:Root, + kw.RESC_NAME_KW:Root } + + PUT_LOG = self.In_Memory_Stream() + GET_LOG = self.In_Memory_Stream() + NumThreadsRegex = re.compile('^num_threads\s*=\s*(\d+)',re.MULTILINE) + + try: + with irods.parallel.enableLogging( logging.StreamHandler, (PUT_LOG,), level_=logging.INFO): + self.sess.data_objects.put(datafile.name, data_obj_name, num_threads = 0, **options) # - PUT + match = NumThreadsRegex.search (PUT_LOG.getvalue()) + self.assertTrue (match is not None and int(match.group(1)) >= 1) # - PARALLEL code path taken? + + with irods.parallel.enableLogging( logging.StreamHandler, (GET_LOG,), level_=logging.INFO): + self.sess.data_objects.get(data_obj_name, datafile.name+".get", num_threads = 0, **options) # - GET + match = NumThreadsRegex.search (GET_LOG.getvalue()) + self.assertTrue (match is not None and int(match.group(1)) >= 1) # - PARALLEL code path taken? + + files_to_delete += [datafile.name + ".get"] + + with open(datafile.name, "rb") as f1, open(datafile.name + ".get", "rb") as f2: + self.assertEqual ( f1.read(), f2.read() ) + + q = self.sess.query (DataObject.name,DataObject.resc_hier).filter( DataObject.name == base_name, + DataObject.resource_name == Leaf) + replicas = list(q) + self.assertEqual( len(replicas), 1 ) + self.assertEqual( replicas[0][DataObject.resc_hier] , ';'.join([Root,Leaf]) ) + + finally: + self.sess.data_objects.unlink( data_obj_name, force = True) + for n in files_to_delete: os.unlink(n) + + def test_open_existing_dataobj_in_resource_hierarchy__232(self): + Root = 'pt1' + Leaf = 'resc1' + with self.create_resc_hierarchy(Root,Leaf) as hier_str: + obj = None + try: + datafile = NamedTemporaryFile (prefix='getfromhier_232_',delete=True) + datafile.write(b'abc\n') + datafile.flush() + fname = datafile.name + bname = os.path.basename(fname) + LOGICAL = self.coll_path + '/' + bname + self.sess.data_objects.put(fname,LOGICAL, **{kw.DEST_RESC_NAME_KW:Root}) + self.assertEqual([bname], [res[DataObject.name] for res in + self.sess.query(DataObject.name).filter(DataObject.resc_hier == hier_str)]) + obj = self.sess.data_objects.get(LOGICAL) + obj.open('a') # prior to #232 fix, raises DIRECT_CHILD_ACCESS + finally: + if obj: obj.unlink(force=True) def make_new_server_config_json(self, server_config_filename): # load server_config.json to inject a new rule base @@ -56,6 +177,39 @@ def sha256_checksum(self, filename, block_size=65536): return sha256.hexdigest() + def test_compute_chksum( self ): + + with self.create_simple_resc() as R, NamedTemporaryFile(mode = 'wb') as f: + coll_path = '/{0.zone}/home/{0.username}' .format(self.sess) + dobj_path = coll_path + '/' + os.path.basename(f.name) + Data = self.sess.data_objects + try: + f.write(b'some content bytes ...\n') + f.flush() + Data.put( f.name, dobj_path ) + + # get original checksum and resource name + my_object = Data.get(dobj_path) + orig_resc = my_object.replicas[0].resource_name + chk1 = my_object.chksum() + + # repl to new resource and iput to that new replica + Data.replicate( dobj_path, resource = R) + f.write(b'...added bytes\n') + f.flush() + Data.put( f.name, dobj_path, **{kw.DEST_RESC_NAME_KW: R, + kw.FORCE_FLAG_KW: '1'}) + # compare checksums + my_object = Data.get(dobj_path) + chk2 = my_object.chksum( **{kw.RESC_NAME_KW : R} ) + chk1b = my_object.chksum( **{kw.RESC_NAME_KW : orig_resc} ) + self.assertEqual (chk1, chk1b) + self.assertNotEqual (chk1, chk2) + + finally: + if Data.exists (dobj_path): Data.unlink (dobj_path, force = True) + + def test_obj_exists(self): obj_name = 'this_object_will_exist_once_made' exists_path = '{}/{}'.format(self.coll_path, obj_name) @@ -70,6 +224,20 @@ def test_obj_does_not_exist(self): self.assertFalse(self.sess.data_objects.exists(does_not_exist_path)) + def test_create_from_invalid_path__250(self): + possible_exceptions = { ex.CAT_UNKNOWN_COLLECTION: (lambda serv_vsn : serv_vsn >= (4,2,9)), + ex.SYS_INVALID_INPUT_PARAM: (lambda serv_vsn : serv_vsn <= (4,2,8)) + } + raisedExc = None + try: + self.sess.data_objects.create('t') + except Exception as exc: + raisedExc = exc + server_version_cond = possible_exceptions.get(type(raisedExc)) + self.assertTrue(server_version_cond is not None) + self.assertTrue(server_version_cond(self.sess.server_version)) + + def test_rename_obj(self): # test args collection = self.coll_path @@ -99,7 +267,7 @@ def test_rename_obj(self): self.assertEqual(obj.id, saved_id) # remove object - self.sess.data_objects.unlink(new_path) + self.sess.data_objects.unlink(new_path, force = True) def test_move_obj_to_coll(self): @@ -131,6 +299,37 @@ def test_move_obj_to_coll(self): # remove new collection new_coll.remove(recurse=True, force=True) + def test_copy_existing_obj_to_relative_dest_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will give nondescriptive error') + obj_name = 'this_object_will_exist_once_made' + exists_path = '{}/{}'.format(self.coll_path, obj_name) + helpers.make_object(self.sess, exists_path) + self.assertTrue(self.sess.data_objects.exists(exists_path)) + non_existing_zone = 'this_zone_absent' + relative_dst_path = '{non_existing_zone}/{obj_name}'.format(**locals()) + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(exists_path, relative_dst_path, **options) + + def test_copy_from_nonexistent_absolute_data_obj_path_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will hang the client') + non_existing_zone = 'this_zone_absent' + src_path = '/{non_existing_zone}/non_existing.src'.format(**locals()) + dst_path = '/{non_existing_zone}/non_existing.dst'.format(**locals()) + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(src_path, dst_path, **options) + + def test_copy_from_relative_path_fails_irods4796(self): + if self.sess.server_version <= (4, 2, 7): + self.skipTest('iRODS servers <= 4.2.7 will hang the client') + src_path = 'non_existing.src' + dst_path = 'non_existing.dst' + options = {} + with self.assertRaises(ex.USER_INPUT_PATH_ERR): + self.sess.data_objects.copy(src_path, dst_path, **options) def test_copy_obj_to_obj(self): # test args @@ -292,7 +491,7 @@ def test_create_with_checksum(self): obj_path = "{collection}/{filename}".format(**locals()) contents = 'blah' * 100 checksum = base64.b64encode( - hashlib.sha256(contents).digest()).decode() + hashlib.sha256(contents.encode()).digest()).decode() # make object in test collection options = {kw.OPR_TYPE_KW: 1} # PUT_OPR @@ -353,7 +552,8 @@ def test_put_file_trigger_pep(self): # make pseudo-random test file filename = 'test_put_file_trigger_pep.txt' test_file = os.path.join('/tmp', filename) - contents = ''.join(random.choice(string.printable) for _ in range(1024)) + contents = ''.join(random.choice(string.printable) for _ in range(1024)).encode() + contents = contents[:1024] with open(test_file, 'wb') as f: f.write(contents) @@ -482,7 +682,7 @@ def test_replica_number(self): # make ufs resources ufs_resources = [] for i in range(number_of_replicas): - resource_name = 'ufs{}'.format(i) + resource_name = unique_name(my_function_name(),i) resource_type = 'unixfilesystem' resource_host = session.host resource_path = '/tmp/' + resource_name @@ -568,7 +768,7 @@ def test_repave_replicas(self): # make ufs resources and replicate object ufs_resources = [] for i in range(number_of_replicas): - resource_name = 'ufs{}'.format(i) + resource_name = unique_name(my_function_name(),i) resource_type = 'unixfilesystem' resource_host = session.host resource_path = '/tmp/{}'.format(resource_name) @@ -607,6 +807,67 @@ def test_repave_replicas(self): resource.remove() + def test_get_replica_size(self): + session = self.sess + + # Can't do one step open/create with older servers + if session.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # test vars + test_dir = '/tmp' + filename = 'get_replica_size_test_file' + test_file = os.path.join(test_dir, filename) + collection = self.coll.path + + # make random 16byte binary file + original_size = 16 + with open(test_file, 'wb') as f: + f.write(os.urandom(original_size)) + + # make ufs resources + ufs_resources = [] + for i in range(2): + resource_name = unique_name(my_function_name(),i) + resource_type = 'unixfilesystem' + resource_host = session.host + resource_path = '/tmp/{}'.format(resource_name) + ufs_resources.append(session.resources.create( + resource_name, resource_type, resource_host, resource_path)) + + # put file in test collection and replicate + obj_path = '{collection}/{filename}'.format(**locals()) + options = {kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + session.data_objects.put(test_file, collection + '/', **options) + session.data_objects.replicate(obj_path, ufs_resources[1].name) + + # make random 32byte binary file + new_size = 32 + with open(test_file, 'wb') as f: + f.write(os.urandom(new_size)) + + # overwrite existing replica 0 with new file + options = {kw.FORCE_FLAG_KW: '', kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + session.data_objects.put(test_file, collection + '/', **options) + + # delete file + os.remove(test_file) + + # ensure that sizes of the replicas are distinct + obj = session.data_objects.get(obj_path, test_dir) + self.assertEqual(obj.replicas[0].size, new_size) + self.assertEqual(obj.replicas[1].size, original_size) + + # remove object + obj.unlink(force=True) + # delete file + os.remove(test_file) + + # remove ufs resources + for resource in ufs_resources: + resource.remove() + + def test_obj_put_get(self): # Can't do one step open/create with older servers if self.sess.server_version <= (4, 1, 4): @@ -766,6 +1027,46 @@ def test_obj_put_to_default_resource_from_env_file(self): os.remove(new_env_file) + def test_obj_put_and_return_data_object(self): + # Can't do one step open/create with older servers + if self.sess.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # make another UFS resource + session = self.sess + resource_name = 'ufs' + resource_type = 'unixfilesystem' + resource_host = session.host + resource_path = '/tmp/' + resource_name + session.resources.create(resource_name, resource_type, resource_host, resource_path) + + # set default resource to new UFS resource + session.default_resource = resource_name + + # make a local file with random text content + content = ''.join(random.choice(string.printable) for _ in range(1024)) + filename = 'testfile.txt' + file_path = os.path.join('/tmp', filename) + with open(file_path, 'w') as f: + f.write(content) + + # put file + collection = self.coll_path + obj_path = '{collection}/{filename}'.format(**locals()) + + new_file = session.data_objects.put(file_path, obj_path, return_data_object=True) + + # get object and confirm resource + obj = session.data_objects.get(obj_path) + self.assertEqual(new_file.replicas[0].resource_name, obj.replicas[0].resource_name) + + # cleanup + os.remove(file_path) + obj.unlink(force=True) + session.resources.remove(resource_name) + + + def test_force_get(self): # Can't do one step open/create with older servers if self.sess.server_version <= (4, 1, 4): @@ -797,17 +1098,132 @@ def test_force_get(self): os.remove(test_file) - def test_register(self): + def test_modDataObjMeta(self): + test_dir = helpers.irods_shared_tmp_dir() # skip if server is remote - if self.sess.host not in ('localhost', socket.gethostname()): + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not (loc_server): self.skipTest('Requires access to server-side file(s)') # test vars - test_dir = '/tmp' + resc_name = 'testDataObjMetaResc' filename = 'register_test_file' + collection = self.coll.path + obj_path = '{collection}/{filename}'.format(**locals()) + test_path = make_ufs_resc_in_tmpdir(self.sess, resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) + + # make random 4K binary file + with open(test_file, 'wb') as f: + f.write(os.urandom(1024 * 4)) + + # register file in test collection + self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW:resc_name}) + + qu = self.sess.query(Collection.id).filter(Collection.name == collection) + for res in qu: + collection_id = res[Collection.id] + + qu = self.sess.query(DataObject.size, DataObject.modify_time).filter(DataObject.name == filename, DataObject.collection_id == collection_id) + for res in qu: + self.assertEqual(int(res[DataObject.size]), 1024 * 4) + self.sess.data_objects.modDataObjMeta({"objPath" : obj_path}, {"dataSize":1024, "dataModify":4096}) + + qu = self.sess.query(DataObject.size, DataObject.modify_time).filter(DataObject.name == filename, DataObject.collection_id == collection_id) + for res in qu: + self.assertEqual(int(res[DataObject.size]), 1024) + self.assertEqual(res[DataObject.modify_time], datetime.utcfromtimestamp(4096)) + + # leave physical file on disk + self.sess.data_objects.unregister(obj_path) + + # delete file + os.remove(test_file) + + + def test_get_data_objects(self): + # Can't do one step open/create with older servers + if self.sess.server_version <= (4, 1, 4): + self.skipTest('For iRODS 4.1.5 and newer') + + # test vars + test_dir = '/tmp' + filename = 'get_data_objects_test_file' test_file = os.path.join(test_dir, filename) collection = self.coll.path + + # make random 16byte binary file + original_size = 16 + with open(test_file, 'wb') as f: + f.write(os.urandom(original_size)) + + # make ufs resources + ufs_resources = [] + for i in range(2): + resource_name = unique_name(my_function_name(),i) + resource_type = 'unixfilesystem' + resource_host = self.sess.host + resource_path = '/tmp/{}'.format(resource_name) + ufs_resources.append(self.sess.resources.create( + resource_name, resource_type, resource_host, resource_path)) + + + # make passthru resource and add ufs1 as a child + passthru_resource = self.sess.resources.create('pt', 'passthru') + self.sess.resources.add_child(passthru_resource.name, ufs_resources[1].name) + + # put file in test collection and replicate obj_path = '{collection}/{filename}'.format(**locals()) + options = {kw.DEST_RESC_NAME_KW: ufs_resources[0].name} + self.sess.data_objects.put(test_file, '{collection}/'.format(**locals()), **options) + self.sess.data_objects.replicate(obj_path, passthru_resource.name) + + # ensure that replica info is populated + obj = self.sess.data_objects.get(obj_path) + for i in ["number","status","resource_name","path","resc_hier"]: + self.assertIsNotNone(obj.replicas[0].__getattribute__(i)) + self.assertIsNotNone(obj.replicas[1].__getattribute__(i)) + + # ensure replica info is sensible + for i in range(2): + self.assertEqual(obj.replicas[i].number, i) + self.assertEqual(obj.replicas[i].status, '1') + self.assertEqual(obj.replicas[i].path.split('/')[-1], filename) + self.assertEqual(obj.replicas[i].resc_hier.split(';')[-1], ufs_resources[i].name) + + self.assertEqual(obj.replicas[0].resource_name, ufs_resources[0].name) + if self.sess.server_version < (4, 2, 0): + self.assertEqual(obj.replicas[i].resource_name, passthru_resource.name) + else: + self.assertEqual(obj.replicas[i].resource_name, ufs_resources[1].name) + self.assertEqual(obj.replicas[1].resc_hier.split(';')[0], passthru_resource.name) + + # remove object + obj.unlink(force=True) + # delete file + os.remove(test_file) + + # remove resources + self.sess.resources.remove_child(passthru_resource.name, ufs_resources[1].name) + passthru_resource.remove() + for resource in ufs_resources: + resource.remove() + + + def test_register(self): + test_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not(loc_server): + self.skipTest('data_obj register requires server has access to local or shared files') + + # test vars + resc_name = "testRegisterOpResc" + filename = 'register_test_file' + collection = self.coll.path + obj_path = '{collection}/{filename}'.format(**locals()) + + test_path = make_ufs_resc_in_tmpdir(self.sess,resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) # make random 4K binary file with open(test_file, 'wb') as f: @@ -828,23 +1244,26 @@ def test_register(self): def test_register_with_checksum(self): - # skip if server is remote - if self.sess.host not in ('localhost', socket.gethostname()): - self.skipTest('Requires access to server-side file(s)') + test_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not(loc_server): + self.skipTest('data_obj register requires server has access to local or shared files') # test vars - test_dir = '/tmp' + resc_name= 'regWithChksumResc' filename = 'register_test_file' - test_file = os.path.join(test_dir, filename) collection = self.coll.path obj_path = '{collection}/{filename}'.format(**locals()) + test_path = make_ufs_resc_in_tmpdir(self.sess, resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) + # make random 4K binary file with open(test_file, 'wb') as f: f.write(os.urandom(1024 * 4)) # register file in test collection - options = {kw.VERIFY_CHKSUM_KW: ''} + options = {kw.VERIFY_CHKSUM_KW: '', kw.RESC_NAME_KW: resc_name} self.sess.data_objects.register(test_file, obj_path, **options) # confirm object presence and verify checksum @@ -861,16 +1280,18 @@ def test_register_with_checksum(self): # delete file os.remove(test_file) - def test_modDataObjMeta(self): - # skip if server is remote - if self.sess.host not in ('localhost', socket.gethostname()): - self.skipTest('Requires access to server-side file(s)') + def test_register_with_xml_special_chars(self): + test_dir = helpers.irods_shared_tmp_dir() + loc_server = self.sess.host in ('localhost', socket.gethostname()) + if not(test_dir) and not(loc_server): + self.skipTest('data_obj register requires server has access to local or shared files') # test vars - test_dir = '/tmp' - filename = 'register_test_file' - test_file = os.path.join(test_dir, filename) + resc_name = 'regWithXmlSpecialCharsResc' collection = self.coll.path + filename = '''aaa'"<&test&>"'_file''' + test_path = make_ufs_resc_in_tmpdir(self.sess, resc_name, allow_local = loc_server) + test_file = os.path.join(test_path, filename) obj_path = '{collection}/{filename}'.format(**locals()) # make random 4K binary file @@ -878,28 +1299,19 @@ def test_modDataObjMeta(self): f.write(os.urandom(1024 * 4)) # register file in test collection - self.sess.data_objects.register(test_file, obj_path) - - qu = self.sess.query(Collection.id).filter(Collection.name == collection) - for res in qu: - collection_id = res[Collection.id] - - qu = self.sess.query(DataObject.size, DataObject.modify_time).filter(DataObject.name == filename, DataObject.collection_id == collection_id) - for res in qu: - self.assertEqual(int(res[DataObject.size]), 1024 * 4) - self.sess.data_objects.modDataObjMeta({"objPath" : obj_path}, {"dataSize":1024, "dataModify":4096}) + self.sess.data_objects.register(test_file, obj_path, **{kw.RESC_NAME_KW: resc_name}) - qu = self.sess.query(DataObject.size, DataObject.modify_time).filter(DataObject.name == filename, DataObject.collection_id == collection_id) - for res in qu: - self.assertEqual(int(res[DataObject.size]), 1024) - self.assertEqual(res[DataObject.modify_time], datetime.utcfromtimestamp(4096)) + # confirm object presence + obj = self.sess.data_objects.get(obj_path) - # leave physical file on disk - self.sess.data_objects.unregister(obj_path) + # in a real use case we would likely + # want to leave the physical file on disk + obj.unregister() # delete file os.remove(test_file) + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/test/extended_test.py b/irods/test/extended_test.py index 9a81dd4..884a4f1 100644 --- a/irods/test/extended_test.py +++ b/irods/test/extended_test.py @@ -1,4 +1,5 @@ #! /usr/bin/env python +from __future__ import print_function from __future__ import absolute_import import os import sys @@ -9,21 +10,32 @@ class TestContinueQuery(unittest.TestCase): + @classmethod + def setUpClass(cls): + # once only (before all tests), set up large collection + print ("Creating a large collection...", file = sys.stderr) + with helpers.make_session() as sess: + # Create test collection + cls.coll_path = '/{}/home/{}/test_dir'.format(sess.zone, sess.username) + cls.obj_count = 2500 + cls.coll = helpers.make_test_collection( sess, cls.coll_path, cls.obj_count) + def setUp(self): + # open the session (per-test) self.sess = helpers.make_session() - # Create test collection - self.coll_path = '/{}/home/{}/test_dir'.format(self.sess.zone, self.sess.username) - self.obj_count = 2500 - self.coll = helpers.make_test_collection( - self.sess, self.coll_path, self.obj_count) - def tearDown(self): - '''Remove test data and close connections - ''' - self.coll.remove(recurse=True, force=True) + # close the session (per-test) self.sess.cleanup() + @classmethod + def tearDownClass(cls): + """Remove test data.""" + # once only (after all tests), delete large collection + print ("Deleting the large collection...", file = sys.stderr) + with helpers.make_session() as sess: + sess.collections.remove(cls.coll_path, recurse=True, force=True) + def test_walk_large_collection(self): for current_coll, subcolls, objects in self.coll.walk(): # check number of objects diff --git a/irods/test/force_create.py b/irods/test/force_create.py new file mode 100644 index 0000000..5fd0a85 --- /dev/null +++ b/irods/test/force_create.py @@ -0,0 +1,52 @@ +#! /usr/bin/env python +from __future__ import absolute_import +import os +import sys +import unittest + +from irods.exception import OVERWRITE_WITHOUT_FORCE_FLAG +import irods.test.helpers as helpers + +class TestForceCreate(unittest.TestCase): + + def setUp(self): + self.sess = helpers.make_session() + + def tearDown(self): + """Close connections.""" + self.sess.cleanup() + + # This test should pass whether or not federation is configured: + def test_force_create(self): + if self.sess.server_version > (4, 2, 8): + self.skipTest('force flag unneeded for create in iRODS > 4.2.8') + session = self.sess + FILE = '/{session.zone}/home/{session.username}/a.txt'.format(**locals()) + try: + session.data_objects.unlink(FILE) + except: + pass + error = None + try: + session.data_objects.create(FILE) + session.data_objects.create(FILE) + except OVERWRITE_WITHOUT_FORCE_FLAG: + error = "OVERWRITE_WITHOUT_FORCE_FLAG" + self.assertEqual (error, "OVERWRITE_WITHOUT_FORCE_FLAG") + error = None + try: + session.data_objects.create(FILE, force=True) + except: + error = "Error creating with force" + self.assertEqual (error, None) + try: + session.data_objects.unlink(FILE) + except: + error = "Error cleaning up" + self.assertEqual (error, None) + + +if __name__ == '__main__': + # let the tests find the parent irods lib + sys.path.insert(0, os.path.abspath('../..')) + unittest.main() diff --git a/irods/test/helpers.py b/irods/test/helpers.py index 76d9204..5f18088 100644 --- a/irods/test/helpers.py +++ b/irods/test/helpers.py @@ -7,12 +7,71 @@ import hashlib import base64 import math +import socket +import inspect +import threading +import random +import datetime from pwd import getpwnam from irods.session import iRODSSession from irods.message import iRODSMessage from six.moves import range +def my_function_name(): + """Returns the name of the calling function or method""" + return inspect.getframeinfo(inspect.currentframe().f_back).function + +_thrlocal = threading.local() + +def unique_name(*seed_tuple): + '''For deterministic pseudo-random identifiers based on function/method name + to prevent e.g. ICAT collisions within and between tests. Example use: + + def f(session): + seq_num = 1 + a_name = unique_name( my_function_name(), seq_num # [, *optional_further_args] + ) + seq_num += 1 + session.resources.create( a_name, 'unixfilesystem', session.host, '/tmp/' + a_name ) + ''' + if not getattr(_thrlocal,"rand_gen",None) : _thrlocal.rand_gen = random.Random() + _thrlocal.rand_gen.seed(seed_tuple) + return '%016X' % _thrlocal.rand_gen.randint(0,(1<<64)-1) + + +IRODS_SHARED_DIR = os.path.join( os.path.sep, 'irods_shared' ) +IRODS_SHARED_TMP_DIR = os.path.join(IRODS_SHARED_DIR,'tmp') +IRODS_SHARED_REG_RESC_VAULT = os.path.join(IRODS_SHARED_DIR,'reg_resc') + +IRODS_REG_RESC = 'MyRegResc' + +def irods_shared_tmp_dir(): + pth = IRODS_SHARED_TMP_DIR + can_write = False + if os.path.exists(pth): + try: tempfile.NamedTemporaryFile(dir = pth) + except: pass + else: can_write = True + return pth if can_write else '' + +def irods_shared_reg_resc_vault() : + vault = IRODS_SHARED_REG_RESC_VAULT + if os.path.exists(vault): + return vault + else: + return None + +def get_register_resource(session): + vault_path = irods_shared_reg_resc_vault() + Reg_Resc_Name = '' + if vault_path: + session.resources.create(IRODS_REG_RESC, 'unixfilesystem', session.host, vault_path) + Reg_Resc_Name = IRODS_REG_RESC + return Reg_Resc_Name + + + def make_session(**kwargs): try: env_file = kwargs['irods_env_file'] @@ -37,10 +96,14 @@ def make_object(session, path, content=None, **options): content = iRODSMessage.encode_unicode(content) - # 2 step open-create necessary for iRODS 4.1.4 or older - obj = session.data_objects.create(path) - with obj.open('w', **options) as obj_desc: - obj_desc.write(content) + if session.server_version <= (4,1,4): + # 2 step open-create necessary for iRODS 4.1.4 or older + obj = session.data_objects.create(path) + with obj.open('w', **options) as obj_desc: + obj_desc.write(content) + else: + with session.data_objects.open(path, 'w', **options) as obj_desc: + obj_desc.write(content) # refresh object after write return session.data_objects.get(path) @@ -109,6 +172,38 @@ def make_flat_test_dir(dir_path, file_count=10, file_size=1024): with open(file_path, 'wb') as f: f.write(os.urandom(file_size)) +@contextlib.contextmanager +def create_simple_resc (self, rescName = None): + if not rescName: + rescName = 'simple_resc_' + unique_name (my_function_name() + '_simple_resc', datetime.datetime.now()) + created = False + try: + self.sess.resources.create(rescName, + 'unixfilesystem', + host = self.sess.host, + path = '/tmp/' + rescName) + created = True + yield rescName + finally: + if created: + self.sess.resources.remove(rescName) + +@contextlib.contextmanager +def create_simple_resc_hierarchy (self, Root, Leaf): + d = tempfile.mkdtemp() + self.sess.resources.create(Leaf,'unixfilesystem', + host = self.sess.host, + path=d) + self.sess.resources.create(Root,'passthru') + self.sess.resources.add_child(Root,Leaf) + try: + yield ';'.join([Root,Leaf]) + finally: + self.sess.resources.remove_child(Root,Leaf) + self.sess.resources.remove(Leaf) + self.sess.resources.remove(Root) + shutil.rmtree(d) + def chunks(f, chunksize=io.DEFAULT_BUFFER_SIZE): return iter(lambda: f.read(chunksize), b'') @@ -124,6 +219,17 @@ def compute_sha256_digest(file_path): return base64.b64encode(hasher.digest()).decode() +def remove_unused_metadata(session): + from irods.message import GeneralAdminRequest + from irods.api_number import api_number + message_body = GeneralAdminRequest( 'rm', 'unusedAVUs', '','','','') + req = iRODSMessage("RODS_API_REQ", msg = message_body,int_info=api_number['GENERAL_ADMIN_AN']) + with session.pool.get_connection() as conn: + conn.send(req) + response=conn.recv() + if (response.int_info != 0): raise RuntimeError("Error removing unused AVUs") + + @contextlib.contextmanager def file_backed_up(filename): with tempfile.NamedTemporaryFile(prefix=os.path.basename(filename)) as f: @@ -132,3 +238,8 @@ def file_backed_up(filename): yield filename finally: shutil.copyfile(f.name, filename) + + +def irods_session_host_local (sess): + return socket.gethostbyname(sess.host) == \ + socket.gethostbyname(socket.gethostname()) diff --git a/irods/test/login_auth_test.py b/irods/test/login_auth_test.py new file mode 100644 index 0000000..b92e8d4 --- /dev/null +++ b/irods/test/login_auth_test.py @@ -0,0 +1,332 @@ +#! /usr/bin/env python +from __future__ import print_function +from __future__ import absolute_import +import os +import sys +import unittest +import textwrap +import json +import shutil +import ssl +import irods.test.helpers as helpers +from irods.connection import Connection +from irods.session import iRODSSession +from irods.rule import Rule +from irods.models import User +from socket import gethostname +from irods.password_obfuscation import (encode as pw_encode) +from irods.connection import PlainTextPAMPasswordError +import contextlib +from re import compile as regex +try: + from re import _pattern_type as regex_type +except ImportError: + from re import Pattern as regex_type # Python 3.7+ + + +def json_file_update(fname,keys_to_delete=(),**kw): + j = json.load(open(fname,'r')) + j.update(**kw) + for k in keys_to_delete: + if k in j: del j [k] + elif isinstance(k,regex_type): + jk = [i for i in j.keys() if k.search(i)] + for ky in jk: del j[ky] + with open(fname,'w') as out: + json.dump(j, out, indent=4) + +def env_dir_fullpath(authtype): return os.path.join( os.environ['HOME'] , '.irods.' + authtype) +def json_env_fullpath(authtype): return os.path.join( env_dir_fullpath(authtype), 'irods_environment.json') +def secrets_fullpath(authtype): return os.path.join( env_dir_fullpath(authtype), '.irodsA') + +SERVER_ENV_PATH = os.path.expanduser('~irods/.irods/irods_environment.json') + +SERVER_ENV_SSL_SETTINGS = { + "irods_ssl_certificate_chain_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_certificate_key_file": "/etc/irods/ssl/irods.key", + "irods_ssl_dh_params_file": "/etc/irods/ssl/dhparams.pem", + "irods_ssl_ca_certificate_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_verify_server": "cert" +} + +def update_service_account_for_SSL(): + json_file_update( SERVER_ENV_PATH, **SERVER_ENV_SSL_SETTINGS ) + +CLIENT_OPTIONS_FOR_SSL = { + "irods_client_server_policy": "CS_NEG_REQUIRE", + "irods_client_server_negotiation": "request_server_negotiation", + "irods_ssl_ca_certificate_file": "/etc/irods/ssl/irods.crt", + "irods_ssl_verify_server": "cert", + "irods_encryption_key_size": 16, + "irods_encryption_salt_size": 8, + "irods_encryption_num_hash_rounds": 16, + "irods_encryption_algorithm": "AES-256-CBC" +} + + +def client_env_from_server_env(user_name, auth_scheme=""): + cli_env = {} + with open(SERVER_ENV_PATH) as f: + srv_env = json.load(f) + for k in [ "irods_host", "irods_zone_name", "irods_port" ]: + cli_env [k] = srv_env[k] + cli_env["irods_user_name"] = user_name + if auth_scheme: + cli_env["irods_authentication_scheme"] = auth_scheme + return cli_env + +@contextlib.contextmanager +def pam_password_in_plaintext(allow=True): + saved = bool(Connection.DISALLOWING_PAM_PLAINTEXT) + try: + Connection.DISALLOWING_PAM_PLAINTEXT = not(allow) + yield + finally: + Connection.DISALLOWING_PAM_PLAINTEXT = saved + + +class TestLogins(unittest.TestCase): + ''' + This is due to be moved into Jenkins CI along core and other iRODS tests. + Until then, for these tests to run successfully, we require: + 1. First run ./setupssl.py (sets up SSL keys etc. in /etc/irods/ssl) + 2. Add & override configuration entries in /var/lib/irods/irods_environment + Per https://slides.com/irods/ugm2018-ssl-and-pam-configuration#/3/7 + 3. Create rodsuser alissa and corresponding unix user with the appropriate + passwords as below. + ''' + + test_rods_user = 'alissa' + + user_auth_envs = { + '.irods.pam': { + 'USER': test_rods_user, + 'PASSWORD': 'test123', # UNIX pw + 'AUTH': 'pam' + }, + '.irods.native': { + 'USER': test_rods_user, + 'PASSWORD': 'apass', # iRODS pw + 'AUTH': 'native' + } + } + + env_save = {} + + @contextlib.contextmanager + def setenv(self,var,newvalue): + try: + self.env_save[var] = os.environ.get(var,None) + os.environ[var] = newvalue + yield newvalue + finally: + oldvalue = self.env_save[var] + if oldvalue is None: + del os.environ[var] + else: + os.environ[var]=oldvalue + + @classmethod + def create_env_dirs(cls): + dirs = {} + retval = [] + # -- create environment configurations and secrets + with pam_password_in_plaintext(): + for dirname,lookup in cls.user_auth_envs.items(): + if lookup['AUTH'] == 'pam': + ses = iRODSSession( host=gethostname(), + user=lookup['USER'], + zone='tempZone', + authentication_scheme=lookup['AUTH'], + password=lookup['PASSWORD'], + port= 1247 ) + try: + pam_hashes = ses.pam_pw_negotiated + except AttributeError: + pam_hashes = [] + if not pam_hashes: print('Warning ** PAM pw couldnt be generated' ); break + scrambled_pw = pw_encode( pam_hashes[0] ) + #elif lookup['AUTH'] == 'XXXXXX': # TODO: insert other authentication schemes here + elif lookup['AUTH'] in ('native', '',None): + scrambled_pw = pw_encode( lookup['PASSWORD'] ) + cl_env = client_env_from_server_env(cls.test_rods_user) + if lookup.get('AUTH',None) is not None: # - specify auth scheme only if given + cl_env['irods_authentication_scheme'] = lookup['AUTH'] + dirbase = os.path.join(os.environ['HOME'],dirname) + dirs[dirbase] = { 'secrets':scrambled_pw , 'client_environment':cl_env } + + # -- create the environment directories and write into them the configurations just created + for absdir in dirs.keys(): + shutil.rmtree(absdir,ignore_errors=True) + os.mkdir(absdir) + with open(os.path.join(absdir,'irods_environment.json'),'w') as envfile: + envfile.write('{}') + json_file_update(envfile.name, **dirs[absdir]['client_environment']) + with open(os.path.join(absdir,'.irodsA'),'wb') as secrets_file: + secrets_file.write(dirs[absdir]['secrets']) + os.chmod(secrets_file.name,0o600) + + retval = dirs.keys() + return retval + + + @staticmethod + def get_server_ssl_negotiation( session ): + + rule_body = textwrap.dedent(''' + test { *out=""; acPreConnect(*out); + writeLine("stdout", "*out"); + } + ''') + myrule = Rule(session, body=rule_body, params={}, output='ruleExecOut') + out_array = myrule.execute() + buf = out_array.MsParam_PI[0].inOutStruct.stdoutBuf.buf.decode('utf-8') + eol_offset = buf.find('\n') + return buf[:eol_offset] if eol_offset >= 0 else None + + @classmethod + def setUpClass(cls): + cls.admin = helpers.make_session() + if cls.test_rods_user in (row[User.name] for row in cls.admin.query(User.name)): + cls.server_ssl_setting = cls.get_server_ssl_negotiation( cls.admin ) + cls.envdirs = cls.create_env_dirs() + if not cls.envdirs: + raise RuntimeError('Could not create one or more client environments') + + @classmethod + def tearDownClass(cls): + for envdir in getattr(cls, 'envdirs', []): + shutil.rmtree(envdir, ignore_errors=True) + cls.admin.cleanup() + + def setUp(self): + if not getattr(self, 'envdirs', []): + self.skipTest('The test_rods_user "{}" does not exist'.format(self.test_rods_user)) + super(TestLogins,self).setUp() + + def tearDown(self): + super(TestLogins,self).tearDown() + + def validate_session(self, session, verbose=False, **options): + + # - try to get the home collection + home_coll = '/{0.zone}/home/{0.username}'.format(session) + self.assertTrue(session.collections.get(home_coll).path == home_coll) + if verbose: print(home_coll) + # - check user is as expected + self.assertEqual( session.username, self.test_rods_user ) + # - check socket type (normal vs SSL) against whether ssl requested + use_ssl = options.pop('ssl',None) + if use_ssl is not None: + my_connect = [s for s in (session.pool.active|session.pool.idle)] [0] + self.assertEqual( bool( use_ssl ), my_connect.socket.__class__ is ssl.SSLSocket ) + + +# def test_demo(self): self.demo() + +# def demo(self): # for future reference - skipping based on CS_NEG_DONT_CARE setting +# if self.server_ssl_setting == 'CS_NEG_DONT_CARE': +# self.skipTest('skipping b/c setting is DONT_CARE') +# self.assertTrue (False) + + + def tst0(self, ssl_opt, auth_opt, env_opt ): + auth_opt_explicit = 'native' if auth_opt=='' else auth_opt + verbosity=False + #verbosity='' # -- debug - sanity check by printing out options applied + out = {'':''} + if env_opt: + with self.setenv('IRODS_ENVIRONMENT_FILE', json_env_fullpath(auth_opt_explicit)) as env_file,\ + self.setenv('IRODS_AUTHENTICATION_FILE', secrets_fullpath(auth_opt_explicit)): + cli_env_extras = {} if not(ssl_opt) else dict( CLIENT_OPTIONS_FOR_SSL ) + if auth_opt: + cli_env_extras.update( irods_authentication_scheme = auth_opt ) + remove=[] + else: + remove=[regex('authentication_')] + with helpers.file_backed_up(env_file): + json_file_update( env_file, keys_to_delete=remove, **cli_env_extras ) + session = iRODSSession(irods_env_file=env_file) + out = json.load(open(env_file)) + self.validate_session( session, verbose = verbosity, ssl = ssl_opt ) + session.cleanup() + out['ARGS']='no' + else: + session_options = {} + if auth_opt: + session_options.update (authentication_scheme = auth_opt) + if ssl_opt: + SSL_cert = CLIENT_OPTIONS_FOR_SSL["irods_ssl_ca_certificate_file"] + session_options.update( + ssl_context = ssl.create_default_context ( purpose = ssl.Purpose.SERVER_AUTH, + capath = None, + cadata = None, + cafile = SSL_cert), + **CLIENT_OPTIONS_FOR_SSL ) + lookup = self.user_auth_envs ['.irods.'+('native' if not(auth_opt) else auth_opt)] + session = iRODSSession ( host=gethostname(), + user=lookup['USER'], + zone='tempZone', + password=lookup['PASSWORD'], + port= 1247, + **session_options ) + out = session_options + self.validate_session( session, verbose = verbosity, ssl = ssl_opt ) + session.cleanup() + out['ARGS']='yes' + + if verbosity == '': + print ('--- ssl:',ssl_opt,'/ auth:',repr(auth_opt),'/ env:',env_opt) + print ('--- > ',json.dumps({k:v for k,v in out.items() if k != 'ssl_context'},indent=4)) + print ('---') + + # == test defaulting to 'native' + + def test_01(self): + self.tst0 ( ssl_opt = True , auth_opt = '' , env_opt = False ) + def test_02(self): + self.tst0 ( ssl_opt = False, auth_opt = '' , env_opt = False ) + def test_03(self): + self.tst0 ( ssl_opt = True , auth_opt = '' , env_opt = True ) + def test_04(self): + self.tst0 ( ssl_opt = False, auth_opt = '' , env_opt = True ) + + # == test explicit scheme 'native' + + def test_1(self): + self.tst0 ( ssl_opt = True , auth_opt = 'native' , env_opt = False ) + + def test_2(self): + self.tst0 ( ssl_opt = False, auth_opt = 'native' , env_opt = False ) + + def test_3(self): + self.tst0 ( ssl_opt = True , auth_opt = 'native' , env_opt = True ) + + def test_4(self): + self.tst0 ( ssl_opt = False, auth_opt = 'native' , env_opt = True ) + + # == test explicit scheme 'pam' + + def test_5(self): + self.tst0 ( ssl_opt = True, auth_opt = 'pam' , env_opt = False ) + + def test_6(self): + try: + self.tst0 ( ssl_opt = False, auth_opt = 'pam' , env_opt = False ) + except PlainTextPAMPasswordError: + pass + else: + # -- no exception raised + self.fail("PlainTextPAMPasswordError should have been raised") + + def test_7(self): + self.tst0 ( ssl_opt = True , auth_opt = 'pam' , env_opt = True ) + + def test_8(self): + self.tst0 ( ssl_opt = False, auth_opt = 'pam' , env_opt = True ) + + +if __name__ == '__main__': + # let the tests find the parent irods lib + sys.path.insert(0, os.path.abspath('../..')) + unittest.main() diff --git a/irods/test/meta_test.py b/irods/test/meta_test.py index 49fd24f..8954920 100644 --- a/irods/test/meta_test.py +++ b/irods/test/meta_test.py @@ -4,8 +4,9 @@ import os import sys import unittest -from irods.meta import iRODSMeta -from irods.models import DataObject, Collection +from irods.meta import (iRODSMeta, AVUOperation, BadAVUOperationValue, BadAVUOperationKeyword) +from irods.manager.metadata_manager import InvalidAtomicAVURequest +from irods.models import (DataObject, Collection, Resource) import irods.test.helpers as helpers from six.moves import range @@ -19,7 +20,6 @@ class TestMeta(unittest.TestCase): def setUp(self): self.sess = helpers.make_session() - # test data self.coll_path = '/{}/home/{}/test_dir'.format(self.sess.zone, self.sess.username) self.obj_name = 'test1' @@ -29,13 +29,91 @@ def setUp(self): self.coll = self.sess.collections.create(self.coll_path) self.obj = self.sess.data_objects.create(self.obj_path) - def tearDown(self): '''Remove test data and close connections ''' self.coll.remove(recurse=True, force=True) + helpers.remove_unused_metadata(self.sess) self.sess.cleanup() + from irods.test.helpers import create_simple_resc_hierarchy + + def test_atomic_metadata_operations_244(self): + user = self.sess.users.get("rods") + group = self.sess.user_groups.get("public") + m = ( "attr_244","value","units") + + with self.assertRaises(BadAVUOperationValue): + AVUOperation(operation="add", avu=m) + + with self.assertRaises(BadAVUOperationValue): + AVUOperation(operation="not_add_or_remove", avu=iRODSMeta(*m)) + + with self.assertRaises(BadAVUOperationKeyword): + AVUOperation(operation="add", avu=iRODSMeta(*m), extra_keyword=None) + + + with self.assertRaises(InvalidAtomicAVURequest): + user.metadata.apply_atomic_operations( tuple() ) + + user.metadata.apply_atomic_operations() # no AVUs applied - no-op without error + + for n,obj in enumerate((group, user, self.coll, self.obj)): + avus = [ iRODSMeta('some_attribute',str(i),'some_units') for i in range(n*100,(n+1)*100) ] + obj.metadata.apply_atomic_operations(*[AVUOperation(operation="add", avu=avu_) for avu_ in avus]) + obj.metadata.apply_atomic_operations(*[AVUOperation(operation="remove", avu=avu_) for avu_ in avus]) + + + def test_atomic_metadata_operation_for_resource_244(self): + (root,leaf)=('ptX','rescX') + with self.create_simple_resc_hierarchy(root,leaf): + root_resc = self.sess.resources.get(root) # resource objects + leaf_resc = self.sess.resources.get(leaf) + root_tuple = ('role','root','new units #1') # AVU tuples to apply + leaf_tuple = ('role','leaf','new units #2') + root_resc.metadata.add( *root_tuple[:2] ) # first apply without units ... + leaf_resc.metadata.add( *leaf_tuple[:2] ) + for resc,resc_tuple in ((root_resc, root_tuple), (leaf_resc, leaf_tuple)): + resc.metadata.apply_atomic_operations( # metadata set operation (remove + add) to add units + AVUOperation(operation="remove", avu=iRODSMeta(*resc_tuple[:2])), + AVUOperation(operation="add", avu=iRODSMeta(*resc_tuple[:3])) + ) + resc_meta = self.sess.metadata.get(Resource, resc.name) + avus_to_tuples = lambda avu_list: sorted([(i.name,i.value,i.units) for i in avu_list]) + self.assertEqual(avus_to_tuples(resc_meta), avus_to_tuples([iRODSMeta(*resc_tuple)])) + + + def test_atomic_metadata_operation_for_data_object_244(self): + AVUs_Equal = lambda avu1,avu2,fn=(lambda x:x): fn(avu1)==fn(avu2) + AVU_As_Tuple = lambda avu,length=3:(avu.name,avu.value,avu.units)[:length] + AVU_Units_String = lambda avu:"" if not avu.units else avu.units + m = iRODSMeta( "attr_244","value","units") + self.obj.metadata.add(m) + meta = self.sess.metadata.get(DataObject, self.obj_path) + self.assertEqual(len(meta), 1) + self.assertTrue(AVUs_Equal(m,meta[0],AVU_As_Tuple)) + self.obj.metadata.apply_atomic_operations( # remove original AVU and replace + AVUOperation(operation="remove",avu=m), # with two altered versions + AVUOperation(operation="add",avu=iRODSMeta(m.name,m.value,"units_244")), # (one of them without units) ... + AVUOperation(operation="add",avu=iRODSMeta(m.name,m.value)) + ) + meta = self.sess.metadata.get(DataObject, self.obj_path) # ... check integrity of change + self.assertEqual(sorted([AVU_Units_String(i) for i in meta]), ["","units_244"]) + + def test_atomic_metadata_operations_255(self): + my_resc = self.sess.resources.create('dummyResc','passthru') + avus = [iRODSMeta('a','b','c'), iRODSMeta('d','e','f')] + objects = [ self.sess.users.get("rods"), self.sess.user_groups.get("public"), my_resc, + self.sess.collections.get(self.coll_path), self.sess.data_objects.get(self.obj_path) ] + try: + for obj in objects: + self.assertEqual(len(obj.metadata.items()), 0) + for n,item in enumerate(avus): + obj.metadata.apply_atomic_operations(AVUOperation(operation='add',avu=item)) + self.assertEqual(len(obj.metadata.items()), n+1) + finally: + for obj in objects: obj.metadata.remove_all() + my_resc.remove() def test_get_obj_meta(self): # get object metadata @@ -44,6 +122,19 @@ def test_get_obj_meta(self): # there should be no metadata at this point assert len(meta) == 0 + def test_resc_meta(self): + rescname = 'demoResc' + self.sess.resources.get(rescname).metadata.remove_all() + self.sess.metadata.set(Resource, rescname, iRODSMeta('zero','marginal','cost')) + self.sess.metadata.add(Resource, rescname, iRODSMeta('zero','marginal')) + self.sess.metadata.set(Resource, rescname, iRODSMeta('for','ever','after')) + meta = self.sess.resources.get(rescname).metadata + self.assertTrue( len(meta) == 3 ) + resource = self.sess.resources.get(rescname) + all_AVUs= resource.metadata.items() + for avu in all_AVUs: + resource.metadata.remove(avu) + self.assertTrue(0 == len(self.sess.resources.get(rescname).metadata)) def test_add_obj_meta(self): # add metadata to test object diff --git a/irods/test/pool_test.py b/irods/test/pool_test.py index 0f38ff8..3b28da4 100644 --- a/irods/test/pool_test.py +++ b/irods/test/pool_test.py @@ -1,15 +1,51 @@ #! /usr/bin/env python from __future__ import absolute_import +import datetime import os +import re import sys +import time +import json import unittest import irods.test.helpers as helpers +# Regular expression to match common synonyms for localhost. +# + +LOCALHOST_REGEX = re.compile(r"""^(127(\.\d+){1,3}|[0:]+1|(.*-)?localhost(\.\w+)?)$""",re.IGNORECASE) +USE_ONLY_LOCALHOST = False + class TestPool(unittest.TestCase): + config_extension = ".json" + test_extension = "" + preferred_parameters = {} + + @classmethod + def setUpClass(cls): # generate test env files using connect data from ~/.irods environment + if USE_ONLY_LOCALHOST: return + Nonlocal_Ext = ".test" + with helpers.make_session() as session: + cls.preferred_parameters = { 'irods_host':session.host, + 'irods_port':session.port, + 'irods_user_name':session.username, + 'irods_zone_name':session.zone } + test_configs_dir = os.path.join(irods_test_path(),"test-data") + for config in [os.path.join(test_configs_dir,f) for f in os.listdir(test_configs_dir) + if f.endswith(cls.config_extension)]: + with open(config,"r") as in_, open(config + Nonlocal_Ext,"w") as out_: + cf = json.load(in_) + cf.update(cls.preferred_parameters) + json.dump(cf, out_,indent=4) + cls.test_extension = Nonlocal_Ext + + def setUp(self): - self.sess = helpers.make_session() + self.sess = helpers.make_session( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment.json" + self.test_extension)) + if USE_ONLY_LOCALHOST and not LOCALHOST_REGEX.match (self.sess.host): + self.skipTest('for non-local server') def tearDown(self): '''Close connections @@ -17,7 +53,7 @@ def tearDown(self): self.sess.cleanup() def test_release_connection(self): - with self.sess.pool.get_connection() as conn: + with self.sess.pool.get_connection(): self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -34,7 +70,7 @@ def test_destroy_active(self): self.assertEqual(0, len(self.sess.pool.idle)) def test_destroy_idle(self): - with self.sess.pool.get_connection() as conn: + with self.sess.pool.get_connection(): self.assertEqual(1, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) @@ -58,6 +94,178 @@ def test_release_disconnected(self): self.assertEqual(0, len(self.sess.pool.active)) self.assertEqual(0, len(self.sess.pool.idle)) + def test_connection_create_time(self): + # Get a connection and record its object ID and create_time + # Release the connection (goes from active to idle queue) + # Again, get a connection. Should get the same connection back. + # I.e., the object IDs should match. However, the new connection + # should have a more recent 'last_used_time' + conn_obj_id_1 = None + conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + create_time_1 = conn.create_time + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + create_time_2 = conn.create_time + last_used_time_2 = conn.last_used_time + self.assertEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(curr_time >= create_time_2) + self.assertTrue(curr_time >= last_used_time_2) + self.assertTrue(last_used_time_2 >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_refresh_connection(self): + # Set 'irods_connection_refresh_time' to '3' (in seconds) in + # ~/.irods/irods_environment.json file. This means any connection + # that was created more than 3 seconds ago will be dropped and + # a new connection is created/returned. This is to avoid + # issue with idle connections that are dropped. + conn_obj_id_1 = None + conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + create_time_1 = conn.create_time + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + # Wait more than 'irods_connection_refresh_time' seconds, + # which is set to 3. Connection object should have a different + # object ID (as a new connection is created) + time.sleep(5) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + create_time_2 = conn.create_time + last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= create_time_2) + self.assertTrue(curr_time >= last_used_time_2) + self.assertNotEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(create_time_2 > create_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + def test_no_refresh_connection(self): + # Set 'irods_connection_refresh_time' to '3' (in seconds) in + # ~/.irods/irods_environment.json file. This means any connection + # created more than 3 seconds ago will be dropped and + # a new connection is created/returned. This is to avoid + # issue with idle connections that are dropped. + conn_obj_id_1 = None + conn_obj_id_2 = None + create_time_1 = None + create_time_2 = None + last_used_time_1 = None + last_used_time_2 = None + + with self.sess.pool.get_connection() as conn: + conn_obj_id_1 = id(conn) + curr_time = datetime.datetime.now() + create_time_1 = conn.create_time + last_used_time_1 = conn.last_used_time + self.assertTrue(curr_time >= create_time_1) + self.assertTrue(curr_time >= last_used_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(1, len(self.sess.pool.idle)) + + # Wait less than 'irods_connection_refresh_time' seconds, + # which is set to 3. Connection object should have the same + # object ID (as idle time is less than 'irods_connection_refresh_time') + time.sleep(1) + + with self.sess.pool.get_connection() as conn: + conn_obj_id_2 = id(conn) + curr_time = datetime.datetime.now() + create_time_2 = conn.create_time + last_used_time_2 = conn.last_used_time + self.assertTrue(curr_time >= create_time_2) + self.assertTrue(curr_time >= last_used_time_2) + self.assertEqual(conn_obj_id_1, conn_obj_id_2) + self.assertTrue(create_time_2 >= create_time_1) + self.assertEqual(1, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + self.sess.pool.release_connection(conn, True) + self.assertEqual(0, len(self.sess.pool.active)) + self.assertEqual(0, len(self.sess.pool.idle)) + + + def test_get_connection_refresh_time_no_env_file_input_param(self): + connection_refresh_time = self.sess.get_connection_refresh_time(first_name="Magic", last_name="Johnson") + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_none_existant_env_file(self): + connection_refresh_time = self.sess.get_connection_refresh_time( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment_non_existant.json" + self.test_extension)) + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_no_connection_refresh_field(self): + connection_refresh_time = self.sess.get_connection_refresh_time( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment_no_refresh_field.json" + self.test_extension)) + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time_negative_connection_refresh_field(self): + connection_refresh_time = self.sess.get_connection_refresh_time( + irods_env_file=os.path.join(irods_test_path(),"test-data","irods_environment_negative_refresh_field.json" + self.test_extension)) + self.assertEqual(connection_refresh_time, -1) + + def test_get_connection_refresh_time(self): + default_path = os.path.join (irods_test_path(),"test-data","irods_environment.json" + self.test_extension) + connection_refresh_time = self.sess.get_connection_refresh_time(irods_env_file=default_path) + self.assertEqual(connection_refresh_time, 3) + + +def irods_test_path(): + return os.path.dirname(__file__) + if __name__ == '__main__': # let the tests find the parent irods lib diff --git a/irods/test/query_test.py b/irods/test/query_test.py index d9a5f98..5b81134 100644 --- a/irods/test/query_test.py +++ b/irods/test/query_test.py @@ -1,19 +1,67 @@ #! /usr/bin/env python +# -*- coding: utf-8 -*- +from __future__ import print_function from __future__ import absolute_import import os +import six import sys +import tempfile import unittest +import time +import uuid from datetime import datetime -from irods.models import User, Collection, DataObject, Resource +from irods.models import (User, UserMeta, + Resource, ResourceMeta, + Collection, CollectionMeta, + DataObject, DataObjectMeta, + RuleExec) + +from tempfile import NamedTemporaryFile from irods.exception import MultipleResultsFound, CAT_UNKNOWN_SPECIFIC_QUERY, CAT_INVALID_ARGUMENT from irods.query import SpecificQuery -from irods.column import Like, Between +from irods.column import Like, Between, In +from irods.meta import iRODSMeta +from irods.rule import Rule from irods import MAX_SQL_ROWS +from irods.test.helpers import irods_shared_reg_resc_vault import irods.test.helpers as helpers +from six.moves import range as py3_range +import irods.keywords as kw + +IRODS_STATEMENT_TABLE_SIZE = 50 + + +def rows_returned(query): + return len( list(query) ) class TestQuery(unittest.TestCase): + Iterate_to_exhaust_statement_table = range(IRODS_STATEMENT_TABLE_SIZE + 1) + + More_than_one_batch = 2*MAX_SQL_ROWS # may need to increase if PRC default page + # size is increased beyond 500 + + register_resc = '' + + @classmethod + def setUpClass(cls): + with helpers.make_session() as sess: + resource_name = helpers.get_register_resource(sess) + if resource_name: + cls.register_resc = resource_name + + @classmethod + def tearDownClass(cls): + with helpers.make_session() as sess: + try: + if cls.register_resc: + sess.resources.get(cls.register_resc).remove() + except Exception as e: + print( "Could not remove resc {!r} due to: {} ".format(cls.register_resc,e), + file=sys.stderr) + + def setUp(self): self.sess = helpers.make_session() @@ -26,14 +74,12 @@ def setUp(self): self.coll = self.sess.collections.create(self.coll_path) self.obj = self.sess.data_objects.create(self.obj_path) - def tearDown(self): '''Remove test data and close connections ''' self.coll.remove(recurse=True, force=True) self.sess.cleanup() - def test_collections_query(self): # collection query test result = self.sess.query(Collection.id, Collection.name).all() @@ -145,6 +191,20 @@ def test_query_order_by_invalid_param(self): results = self.sess.query(User.name).order_by( User.name, order='moo').all() + def test_query_order_by_col_not_in_result__183(self): + test_collection_size = 8 + test_collection_path = '/{0}/home/{1}/testcoln_for_col_not_in_result'.format(self.sess.zone, self.sess.username) + c1 = c2 = None + try: + c1 = helpers.make_test_collection( self.sess, test_collection_path+"1", obj_count=test_collection_size) + c2 = helpers.make_test_collection( self.sess, test_collection_path+"2", obj_count=test_collection_size) + d12 = [ sorted([d.id for d in c.data_objects]) for c in sorted((c1,c2),key=lambda c:c.id) ] + query = self.sess.query(DataObject).filter(Like(Collection.name, test_collection_path+"_")).order_by(Collection.id) + q12 = list(map(lambda res:res[DataObject.id], query)) + self.assertTrue(d12[0] + d12[1] == sorted( q12[:test_collection_size] ) + sorted( q12[test_collection_size:])) + finally: + if c1: c1.remove(recurse=True,force=True) + if c2: c2.remove(recurse=True,force=True) def test_query_with_like_condition(self): '''Equivalent to: @@ -154,7 +214,6 @@ def test_query_with_like_condition(self): query = self.sess.query(Resource).filter(Like(Resource.name, 'dem%')) self.assertIn('demoResc', [row[Resource.name] for row in query]) - def test_query_with_between_condition(self): '''Equivalent to: iquest "select RESC_NAME, COLL_NAME, DATA_NAME where DATA_MODIFY_TIME between '01451606400' '...'" @@ -171,6 +230,316 @@ def test_query_with_between_condition(self): res_str = '{} {}/{}'.format(result[Resource.name], result[Collection.name], result[DataObject.name]) self.assertIn(session.zone, res_str) + def test_query_with_in_condition(self): + collection = self.coll_path + filename = 'test_query_id_in_list.txt' + file_path = '{collection}/{filename}'.format(**locals()) + obj1 = helpers.make_object(self.sess, file_path+'-1') + obj2 = helpers.make_object(self.sess, file_path+'-2') + ids = [x.id for x in (obj1,obj2)] + for number in range(3): # slice for empty(:0), first(:1) or both(:2) + search_tuple = (ids[:number] if number >= 1 else [0] + ids[:number]) + q = self.sess.query(DataObject.name).filter(In( DataObject.id, search_tuple )) + self.assertEqual (number, rows_returned(q)) + + def test_simultaneous_multiple_AVU_joins(self): + objects = [] + decoys = [] + try: + collection = self.coll_path + filename = 'test_multiple_AVU_joins' + file_path = '{collection}/{filename}'.format(**locals()) + for x in range(3,9): + obj = helpers.make_object(self.sess, file_path+'-{}'.format(x)) # with metadata + objects.append(obj) + obj.metadata.add('A_meta','1{}'.format(x)) + obj.metadata.add('B_meta','2{}'.format(x)) + decoys.append(helpers.make_object(self.sess, file_path+'-dummy{}'.format(x))) # without metadata + self.assertTrue( len(objects) > 0 ) + + # -- test simple repeat of same column -- + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'A_meta', DataObjectMeta.value < '20').\ + filter(DataObjectMeta.name == 'B_meta', DataObjectMeta.value >= '20') + self.assertTrue( rows_returned(q) == len(objects) ) + + # -- test no-stomp of previous filter -- + self.assertTrue( ('B_meta','28') in [ (x.name,x.value) for x in objects[-1].metadata.items() ] ) + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value < '28').\ + filter(DataObjectMeta.name == 'B_meta').filter(Like(DataObjectMeta.value, '2_')) + self.assertTrue( rows_returned(q) == len(objects)-1 ) + + # -- test multiple AVU's by same attribute name -- + objects[-1].metadata.add('B_meta','29') + q = self.sess.query(DataObject,DataObjectMeta).\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '28').\ + filter(DataObjectMeta.name == 'B_meta').filter(DataObjectMeta.value == '29') + self.assertTrue(rows_returned(q) == 1) + finally: + for x in (objects + decoys): + x.unlink(force=True) + helpers.remove_unused_metadata( self.sess ) + + def test_query_on_AVU_times(self): + test_collection_path = '/{zone}/home/{user}/test_collection'.format( zone = self.sess.zone, user = self.sess.username) + testColl = helpers.make_test_collection(self.sess, test_collection_path, obj_count = 1) + testData = testColl.data_objects[0] + testResc = self.sess.resources.get('demoResc') + testUser = self.sess.users.get(self.sess.username) + objects = { 'r': testResc, 'u': testUser, 'c':testColl, 'd':testData } + object_IDs = { sfx:obj.id for sfx,obj in objects.items() } + tables = { 'r': (Resource, ResourceMeta), + 'u': (User, UserMeta), + 'd': (DataObject, DataObjectMeta), + 'c': (Collection, CollectionMeta) } + try: + str_number_incr = lambda str_numbers : str(1+max([0]+[int(n) if n.isdigit() else 0 for n in str_numbers])) + AVU_unique_incr = lambda obj,suffix='' : ( 'a_'+suffix, + 'v_'+suffix, + str_number_incr(avu.units for avu in obj.metadata.items()) ) + before = datetime.utcnow() + time.sleep(1.5) + for suffix,obj in objects.items(): obj.metadata.add( *AVU_unique_incr(obj,suffix) ) + after = datetime.utcnow() + for suffix, tblpair in tables.items(): + self.sess.query( *tblpair ).filter(tblpair[1].modify_time <= after )\ + .filter(tblpair[1].modify_time > before )\ + .filter(tblpair[0].id == object_IDs[suffix] ).one() + self.sess.query( *tblpair ).filter(tblpair[1].create_time <= after )\ + .filter(tblpair[1].create_time > before )\ + .filter(tblpair[0].id == object_IDs[suffix] ).one() + finally: + for obj in objects.values(): + for avu in obj.metadata.items(): obj.metadata.remove(avu) + testColl.remove(recurse=True,force=True) + helpers.remove_unused_metadata( self.sess ) + + + def test_multiple_criteria_on_one_column_name(self): + collection = self.coll_path + filename = 'test_multiple_AVU_joins' + file_path = '{collection}/{filename}'.format(**locals()) + objects = [] + nobj = 0 + for x in range(3,9): + nobj += 2 + obj1 = helpers.make_object(self.sess, file_path+'-{}'.format(x)) + obj2 = helpers.make_object(self.sess, file_path+'-dummy{}'.format(x)) + objects.extend([obj1,obj2]) + self.assertTrue( nobj > 0 and len(objects) == nobj ) + q = self.sess.query(Collection,DataObject) + dummy_test = [d for d in q if d[DataObject.name][-1:] != '8' + and d[DataObject.name][-7:-1] == '-dummy' ] + self.assertTrue( len(dummy_test) > 0 ) + q = q. filter(Like(DataObject.name, '%-dummy_')).\ + filter(Collection.name == collection) .\ + filter(DataObject.name != (filename + '-dummy8')) + results = [r[DataObject.name] for r in q] + self.assertTrue(len(results) == len(dummy_test)) + + + def common_dir_or_vault_info(self): + register_opts= {} + dir_ = None + if self.register_resc: + dir_ = irods_shared_reg_resc_vault() + register_opts[ kw.RESC_NAME_KW ] = self.register_resc + if not(dir_) and helpers.irods_session_host_local (self.sess): + dir_ = tempfile.gettempdir() + if not dir_: + return () + else: + return (dir_ , register_opts) + + + @unittest.skipIf(six.PY3, 'Test is for python2 only') + def test_query_for_data_object_with_utf8_name_python2(self): + reg_info = self.common_dir_or_vault_info() + if not reg_info: + self.skipTest('server is non-localhost and no common path exists for object registration') + (dir_,resc_option) = reg_info + filename_prefix = '_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + self.assertEqual(self.FILENAME_PREFIX.encode('utf-8'), filename_prefix) + _,test_file = tempfile.mkstemp(dir=dir_,prefix=filename_prefix) + obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + results = None + try: + self.sess.data_objects.register(test_file, obj_path, **resc_option) + results = self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file).first() + result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) + result_physical_path = results[DataObject.path] + self.assertEqual(result_logical_path, obj_path.decode('utf8')) + self.assertEqual(result_physical_path, test_file.decode('utf8')) + finally: + if results: self.sess.data_objects.unregister(obj_path) + os.remove(test_file) + + # view/change this line in text editors under own risk: + FILENAME_PREFIX = u'_prefix_ǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸ' + + @unittest.skipIf(six.PY2, 'Test is for python3 only') + def test_query_for_data_object_with_utf8_name_python3(self): + reg_info = self.common_dir_or_vault_info() + if not reg_info: + self.skipTest('server is non-localhost and no common path exists for object registration') + (dir_,resc_option) = reg_info + def python34_unicode_mkstemp( prefix, dir = None, open_mode = 0o777 ): + file_path = os.path.join ((dir or os.environ.get('TMPDIR') or '/tmp'), prefix+'-'+str(uuid.uuid1())) + encoded_file_path = file_path.encode('utf-8') + return os.open(encoded_file_path,os.O_CREAT|os.O_RDWR,mode=open_mode), encoded_file_path + fd = None + filename_prefix = u'_prefix_'\ + u'\u01e0\u01e1\u01e2\u01e3\u01e4\u01e5\u01e6\u01e7\u01e8\u01e9\u01ea\u01eb\u01ec\u01ed\u01ee\u01ef'\ + u'\u01f0\u01f1\u01f2\u01f3\u01f4\u01f5\u01f6\u01f7\u01f8' # make more visible/changeable in VIM + self.assertEqual(self.FILENAME_PREFIX, filename_prefix) + (fd,encoded_test_file) = tempfile.mkstemp(dir = dir_.encode('utf-8'),prefix=filename_prefix.encode('utf-8')) \ + if sys.version_info >= (3,5) \ + else python34_unicode_mkstemp(dir = dir_, prefix = filename_prefix) + self.assertTrue(os.path.exists(encoded_test_file)) + test_file = encoded_test_file.decode('utf-8') + obj_path = os.path.join(self.coll.path, os.path.basename(test_file)) + results = None + try: + self.sess.data_objects.register(test_file, obj_path, **resc_option) + results = list(self.sess.query(DataObject, Collection.name).filter(DataObject.path == test_file)) + if results: + results = results[0] + result_logical_path = os.path.join(results[Collection.name], results[DataObject.name]) + result_physical_path = results[DataObject.path] + self.assertEqual(result_logical_path, obj_path) + self.assertEqual(result_physical_path, test_file) + finally: + if results: self.sess.data_objects.unregister(obj_path) + if fd is not None: os.close(fd) + os.remove(encoded_test_file) + + class Issue_166_context: + ''' + For [irods/python-irodsclient#166] related tests + ''' + + def __init__(self, session, coll_path='test_collection_issue_166', num_objects=8, num_avus_per_object=0): + self.session = session + if '/' not in coll_path: + coll_path = '/{}/home/{}/{}'.format(self.session.zone, self.session.username, coll_path) + self.coll_path = coll_path + self.num_objects = num_objects + self.test_collection = None + self.nAVUs = num_avus_per_object + + def __enter__(self): # - prepare for context block ("with" statement) + + self.test_collection = helpers.make_test_collection( self.session, self.coll_path, obj_count=self.num_objects) + q_params = (Collection.name, DataObject.name) + + if self.nAVUs > 0: + + # - set the AVUs on the collection's objects: + for data_obj_path in map(lambda d:d[Collection.name]+"/"+d[DataObject.name], + self.session.query(*q_params).filter(Collection.name == self.test_collection.path)): + data_obj = self.session.data_objects.get(data_obj_path) + for key in (str(x) for x in py3_range(self.nAVUs)): + data_obj.metadata[key] = iRODSMeta(key, "1") + + # - in subsequent test searches, match on each AVU of every data object in the collection: + q_params += (DataObjectMeta.name,) + + # - The "with" statement receives, as context variable, a zero-arg function to build the query + return lambda : self.session.query( *q_params ).filter( Collection.name == self.test_collection.path) + + def __exit__(self,*_): # - clean up after context block + + if self.test_collection is not None: + self.test_collection.remove(recurse=True, force=True) + + if self.nAVUs > 0 and self.num_objects > 0: + helpers.remove_unused_metadata(self.session) # delete unused AVU's + + def test_query_first__166(self): + + with self.Issue_166_context(self.sess) as buildQuery: + for dummy_i in self.Iterate_to_exhaust_statement_table: + buildQuery().first() + + def test_query_one__166(self): + + with self.Issue_166_context(self.sess, num_objects = self.More_than_one_batch) as buildQuery: + + for dummy_i in self.Iterate_to_exhaust_statement_table: + query = buildQuery() + try: + query.one() + except MultipleResultsFound: + pass # irrelevant result + + def test_query_one_iter__166(self): + + with self.Issue_166_context(self.sess, num_objects = self.More_than_one_batch) as buildQuery: + + for dummy_i in self.Iterate_to_exhaust_statement_table: + + for dummy_row in buildQuery(): + break # single iteration + + def test_paging_get_batches_and_check_paging__166(self): + + with self.Issue_166_context( self.sess, num_objects = 1, + num_avus_per_object = 2 * self.More_than_one_batch) as buildQuery: + + pages = [b for b in buildQuery().get_batches()] + self.assertTrue(len(pages) > 2 and len(pages[0]) < self.More_than_one_batch) + + to_compare = [] + + for _ in self.Iterate_to_exhaust_statement_table: + + for batch in buildQuery().get_batches(): + to_compare.append(batch) + if len(to_compare) == 2: break #leave query unfinished, but save two pages to compare + + # - To make sure paging was done, we ensure that this "key" tuple (collName/dataName , metadataKey) + # is not repeated between first two pages: + + Compare_Key = lambda d: ( d[Collection.name] + "/" + d[DataObject.name], + d[DataObjectMeta.name] ) + Set0 = { Compare_Key(dct) for dct in to_compare[0] } + Set1 = { Compare_Key(dct) for dct in to_compare[1] } + self.assertTrue(len(Set0 & Set1) == 0) # assert intersection is null set + + def test_paging_get_results__166(self): + + with self.Issue_166_context( self.sess, num_objects = self.More_than_one_batch) as buildQuery: + batch_size = 0 + for result_set in buildQuery().get_batches(): + batch_size = len(result_set) + break + + self.assertTrue(0 < batch_size < self.More_than_one_batch) + + for dummy_iter in self.Iterate_to_exhaust_statement_table: + iters = 0 + for dummy_row in buildQuery().get_results(): + iters += 1 + if iters == batch_size - 1: + break # leave iteration unfinished + + def test_rules_query__267(self): + unique = "Testing prc #267: queryable rule objects" + with NamedTemporaryFile(mode='w') as rfile: + rfile.write("""f() {{ delay('1m') {{ writeLine('serverLog','{unique}') }} }}\n""" + """OUTPUT null\n""".format(**locals())) + rfile.flush() + ## create a delayed rule we can query against + myrule = Rule(self.sess, rule_file = rfile.name) + myrule.execute() + qu = self.sess.query(RuleExec.id).filter( Like(RuleExec.frequency,'%1m%'), + Like(RuleExec.name, '%{unique}%'.format(**locals())) ) + results = [row for row in qu] + self.assertEqual(1, len(results)) + if results: + Rule(self.sess).remove_by_id( results[0][RuleExec.id] ) + class TestSpecificQuery(unittest.TestCase): @@ -192,7 +561,7 @@ def test_query_data_name_and_id(self): self.session, test_collection_path, obj_count=test_collection_size) # make specific query - sql = "select data_name, data_id from r_data_main join r_coll_main using (coll_id) where coll_name = '{test_collection_path}'".format(**locals()) + sql = "select DATA_NAME, DATA_ID from R_DATA_MAIN join R_COLL_MAIN using (COLL_ID) where COLL_NAME = '{test_collection_path}'".format(**locals()) alias = 'list_data_name_id' columns = [DataObject.name, DataObject.id] query = SpecificQuery(self.session, sql, alias, columns) @@ -225,7 +594,7 @@ def test_query_data_name_and_id_no_columns(self): self.session, test_collection_path, obj_count=test_collection_size) # make specific query - sql = "select data_name, data_id from r_data_main join r_coll_main using (coll_id) where coll_name = '{test_collection_path}'".format(**locals()) + sql = "select DATA_NAME, DATA_ID from R_DATA_MAIN join R_COLL_MAIN using (COLL_ID) where COLL_NAME = '{test_collection_path}'".format(**locals()) alias = 'list_data_name_id' query = SpecificQuery(self.session, sql, alias) @@ -246,7 +615,7 @@ def test_query_data_name_and_id_no_columns(self): def test_register_query_twice(self): - query = SpecificQuery(self.session, sql='select data_name from r_data_main', alias='list_data_names') + query = SpecificQuery(self.session, sql='select DATA_NAME from R_DATA_MAIN', alias='list_data_names') # register query query.register() @@ -261,7 +630,6 @@ def test_register_query_twice(self): # remove query query.remove() - def test_list_specific_queries(self): query = SpecificQuery(self.session, alias='ls') @@ -270,7 +638,15 @@ def test_list_specific_queries(self): self.assertIn('SELECT', result[1].upper()) # query string - def test_list_specific_queries_with_wrong_alias(self): + def test_list_specific_queries_with_arguments(self): + query = SpecificQuery(self.session, alias='lsl', args=['%OFFSET%']) + + for result in query: + self.assertIsNotNone(result[0]) # query alias + self.assertIn('SELECT', result[1].upper()) # query string + + + def test_list_specific_queries_with_unknown_alias(self): query = SpecificQuery(self.session, alias='foo') with self.assertRaises(CAT_UNKNOWN_SPECIFIC_QUERY): @@ -327,6 +703,7 @@ def test_multiple_criteria_on_one_column_name(self): self.assertTrue(len(results) == len(dummy_test)) + if __name__ == '__main__': # let the tests find the parent irods lib sys.path.insert(0, os.path.abspath('../..')) diff --git a/irods/test/rule_test.py b/irods/test/rule_test.py index dfcc624..233cdd4 100644 --- a/irods/test/rule_test.py +++ b/irods/test/rule_test.py @@ -10,6 +10,7 @@ import irods.test.helpers as helpers from irods.rule import Rule import six +from io import open as io_open class TestRule(unittest.TestCase): @@ -157,11 +158,8 @@ def test_retrieve_std_streams_from_rule(self): INPUT *some_string="{some_string}",*some_other_string="{some_other_string}",*err_string="{err_string}" OUTPUT ruleExecOut'''.format(**locals())) - with open(rule_file_path, "w") as rule_file: - if six.PY2: - rule_file.write(rule.encode('utf-8')) - else: - rule_file.write(rule) + with io_open(rule_file_path, "w", encoding='utf-8') as rule_file: + rule_file.write(rule) # run test rule myrule = Rule(session, rule_file_path) diff --git a/irods/test/setupssl.py b/irods/test/setupssl.py new file mode 100755 index 0000000..aab6bd1 --- /dev/null +++ b/irods/test/setupssl.py @@ -0,0 +1,50 @@ +#!/usr/bin/env python + +from __future__ import print_function +import os +import sys +import socket +import posix +import shutil +from subprocess import (Popen, PIPE) + +IRODS_SSL_DIR = '/etc/irods/ssl' + +def create_ssl_dir(): + save_cwd = os.getcwd() + silent_run = { 'shell': True, 'stderr' : PIPE, 'stdout' : PIPE } + try: + if not (os.path.exists(IRODS_SSL_DIR)): + os.mkdir(IRODS_SSL_DIR) + os.chdir(IRODS_SSL_DIR) + Popen("openssl genrsa -out irods.key 2048",**silent_run).communicate() + with open("/dev/null","wb") as dev_null: + p = Popen("openssl req -new -x509 -key irods.key -out irods.crt -days 365 <".format(**vars(self)) + diff --git a/irods_consortium_continuous_integration_test_module.py b/irods_consortium_continuous_integration_test_module.py new file mode 100644 index 0000000..c8ef414 --- /dev/null +++ b/irods_consortium_continuous_integration_test_module.py @@ -0,0 +1,25 @@ +import json +import sys + +def run (CI): + + final_config = CI.store_config( + { + "yaml_substitutions": { # -> written to ".env" + "python_version" : "3", + "client_os_generic": "ubuntu", + "client_os_image": "ubuntu:18.04" + }, + "container_environments": { + "client-runner" : { # -> written to "client-runner.env" + "TESTS_TO_RUN": "" # run test subset, e.g. "irods.test.data_obj_test" + } + + } + } + ) + + print ('----------\nconfig after CI modify pass\n----------',file=sys.stderr) + print(json.dumps(final_config,indent=4),file=sys.stderr) + + return CI.run_and_wait_on_client_exit () diff --git a/run_python_tests.sh b/run_python_tests.sh new file mode 100644 index 0000000..5ec2207 --- /dev/null +++ b/run_python_tests.sh @@ -0,0 +1,13 @@ +#!/bin/bash + +set -o pipefail +cd repo/irods/test + +export PYTHONUNBUFFERED="Y" + +if [ -z "${TESTS_TO_RUN}" ] ; then + python"${PY_N}" runner.py 2>&1 | tee "${LOG_OUTPUT_DIR}"/prc_test_logs.txt +else + python"${PY_N}" -m unittest -v ${TESTS_TO_RUN} 2>&1 | tee "${LOG_OUTPUT_DIR}"/prc_test_logs.txt +fi + diff --git a/setup.py b/setup.py index 159f46a..6108534 100644 --- a/setup.py +++ b/setup.py @@ -21,6 +21,7 @@ author_email='support@irods.org', description='A python API for iRODS', long_description=long_description, + long_description_content_type='text/x-rst', license='BSD', url='https://github.com/irods/python-irodsclient', keywords='irods', @@ -39,6 +40,11 @@ 'six>=1.10.0', 'PrettyTable>=0.7.2', 'xmlrunner>=1.7.7', - 'humanize' - ] + 'humanize', + 'xmlrunner>=1.7.7' + # - the new syntax: + #'futures; python_version == "2.7"' + ], + # - the old syntax: + extras_require={ ':python_version == "2.7"': ['futures'] } )