IRODS: Advanced Configurations
Storage Tiering
Storage Tiering
Install Storage tiering plugin
$ sudo yum install irods-rule-engine-plugin-storage-tiering
IRODS Rules
Install IRODS Python plugin
$ sudo yum install irods-rule-engine-plugin-python-4.2.7
Add the following entry in "rule_engines" array in /etc/irods/server_config.json, above the "irods_rule_engine_plugin-irods_rule_language" plugin entry.
{
"instance_name": "irods_rule_engine_plugin-python-instance",
"plugin_name": "irods_rule_engine_plugin-python",
"plugin_specific_configuration": {}
}
The complete server_config.json with storage tiering and python rule engine plugin enabled looks like this:
{
"advanced_settings": {
"default_log_rotation_in_days": 5,
"default_number_of_transfer_threads": 4,
"default_temporary_password_lifetime_in_seconds": 120,
"maximum_number_of_concurrent_rule_engine_server_processes": 4,
"maximum_size_for_single_buffer_in_megabytes": 32,
"maximum_temporary_password_lifetime_in_seconds": 1000,
"rule_engine_server_execution_time_in_seconds": 120,
"rule_engine_server_sleep_time_in_seconds": 30,
"transfer_buffer_size_for_parallel_transfer_in_megabytes": 4,
"transfer_chunk_size_for_parallel_transfer_in_megabytes": 40
},
"catalog_provider_hosts": [
"irods.novalocal"
],
"catalog_service_role": "provider",
"client_api_whitelist_policy": "enforce",
"default_dir_mode": "0750",
"default_file_mode": "0600",
"default_hash_scheme": "SHA256",
"default_resource_name": "demoResc",
"environment_variables": {},
"federation": [],
"match_hash_policy": "compatible",
"negotiation_key": "abcdefghijklmnopqrstuvwxyzabcdef",
"plugin_configuration": {
"authentication": {},
"database": {
"postgres": {
"db_host": "localhost",
"db_name": "ICAT",
"db_odbc_driver": "PostgreSQL",
"db_password": "testpassword",
"db_port": 5432,
"db_username": "irods"
}
},
"network": {},
"resource": {},
"rule_engines": [
{
"instance_name": "irods_rule_engine_plugin-storage_tiering-instance",
"plugin_name": "irods_rule_engine_plugin-storage_tiering",
"plugin_specific_configuration": {
"data_transfer_log_level" : "LOG_NOTICE"
}
},
{
"instance_name": "irods_rule_engine_plugin-apply_access_time-instance",
"plugin_name": "irods_rule_engine_plugin-apply_access_time",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_verification-instance",
"plugin_name": "irods_rule_engine_plugin-data_verification",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_replication-instance",
"plugin_name": "irods_rule_engine_plugin-data_replication",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_movement-instance",
"plugin_name": "irods_rule_engine_plugin-data_movement",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-python-instance",
"plugin_name": "irods_rule_engine_plugin-python",
"plugin_specific_configuration": {}
},
{
"instance_name": "irods_rule_engine_plugin-irods_rule_language-instance",
"plugin_name": "irods_rule_engine_plugin-irods_rule_language",
"plugin_specific_configuration": {
"re_data_variable_mapping_set": [
"core"
],
"re_function_name_mapping_set": [
"core"
],
"re_rulebase_set": [
"training",
"core"
],
"regexes_for_supported_peps": [
"ac[^ ]*",
"msi[^ ]*",
"[^ ]*pep_[^ ]*_(pre|post|except|finally)"
]
},
"shared_memory_instance": "irods_rule_language_rule_engine"
},
{
"instance_name": "irods_rule_engine_plugin-cpp_default_policy-instance",
"plugin_name": "irods_rule_engine_plugin-cpp_default_policy",
"plugin_specific_configuration": {}
}
]
},
"rule_engine_namespaces": [
""
],
"schema_name": "server_config",
"schema_validation_base_uri": "file:///var/lib/irods/configuration_schemas",
"schema_version": "v3",
"server_control_plane_encryption_algorithm": "AES-256-CBC",
"server_control_plane_encryption_num_hash_rounds": 16,
"server_control_plane_key": "abcdefghijklmnopqrstuvwxyzabcdef",
"server_control_plane_port": 1248,
"server_control_plane_timeout_milliseconds": 10000,
"server_port_range_end": 20199,
"server_port_range_start": 20000,
"xmsg_port": 1279,
"zone_auth_scheme": "native",
"zone_key": "zone_key",
"zone_name": "tempZone",
"zone_port": 1247,
"zone_user": "rods"
}
Write rules
IRODS provides a framework to write rules in a C++ style language by default. On top of this, rules can also be written in Python2.7 using the Python plugin. Detailed information about the rule engine framework can be found at:
- https://docs.irods.org/4.2.7/plugins/pluggable_rule_engine/
- https://docs.irods.org/4.2.7/plugins/irods_rule_language/
- https://docs.irods.org/4.2.7/plugins/dynamic_policy_enforcement_points/
Moreover, in both the C++ and Python rule engines, rules can be made to run as static rules or as Dynamic PEPs(Policy Enforcement Points)
In the following code section, static rules have been written in Python to convert any csv file to json format, whenever it is written to IRODS.
# /etc/irods/core.py
import os
import session_vars
import sys
import io
import json
import csv
from StringIO import StringIO
def convert_CSV_to_JSON(csv_text):
csv_rows = []
reader = csv.DictReader(StringIO(csv_text))
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
return json.dumps(csv_rows, sort_keys=False, indent=4, separators=(',', ': '))
def acPostProcForPut(rule_args, callback, rei):
# Reading session variables
sv = session_vars.get_map(rei)
objpath = sv['data_object']['object_path']
# Checking the extension of file being written
if not objpath.endswith('.csv'):
return
callback.writeLine('serverLog', str(sv))
# Opening csv file to read
oflags = "objPath={filename}++++rescName=demoResc++++replNum=0++++openFlags=O_RDONLY".format(filename=objpath)
callback.writeLine('serverLog', 'oflags = ' + oflags)
ret_val = callback.msiDataObjOpen(oflags, 0)
file_desc = ret_val['arguments'][1]
# Getting size of file to read
ret_val = callback.msiObjStat(objpath, irods_types.RodsObjStat())
source_file_stat = ret_val['arguments'][1]
callback.writeLine('serverLog', 'objstat = ' + str(source_file_stat))
length = source_file_stat.objSize
# Reading data from file
ret_val = callback.msiDataObjRead(file_desc, length, irods_types.BytesBuf())
read_buf = ret_val['arguments'][2]
callback.writeLine('serverLog', str(ret_val))
read_data = ''.join(read_buf.buf)
# Converting csv to json
data = convert_CSV_to_JSON(read_data)
# Creating json file to write
ret_val = callback.msiDataObjCreate(objpath.replace(".csv", ".json"), "destRescName=demoResc++++forceFlag=", 0)
file_desc_b = ret_val['arguments'][2]
ret_val = callback.msiDataObjWrite(file_desc_b, bytes(data), 0)
# Closing both files
callback.msiDataObjClose(file_desc, 0)
callback.msiDataObjClose(file_desc_b, 0)
Testing
Write any '.csv' file to irods, and a json file should be created.
$ iput d.csv $ ils -l /tempZone/home/rods: rods 0 demoResc 1220939 2020-07-13.08:12 & d.csv rods 0 demoResc 3843622 2020-07-13.08:12 & d.json