IRODS: Advanced Configurations

From Define Wiki
Jump to navigation Jump to search

Storage Tiering

Storage Tiering

Install Storage tiering plugin

$ sudo yum install irods-rule-engine-plugin-storage-tiering

IRODS Rules

Install IRODS Python plugin

$ sudo yum install irods-rule-engine-plugin-python-4.2.7

Add the following entry in "rule_engines" array in /etc/irods/server_config.json, above the "irods_rule_engine_plugin-irods_rule_language" plugin entry.

{
            "instance_name": "irods_rule_engine_plugin-python-instance",
            "plugin_name": "irods_rule_engine_plugin-python",
            "plugin_specific_configuration": {}
}

The complete server_config.json with storage tiering and python rule engine plugin enabled looks like this:

{
    "advanced_settings": {
        "default_log_rotation_in_days": 5, 
        "default_number_of_transfer_threads": 4, 
        "default_temporary_password_lifetime_in_seconds": 120, 
        "maximum_number_of_concurrent_rule_engine_server_processes": 4, 
        "maximum_size_for_single_buffer_in_megabytes": 32, 
        "maximum_temporary_password_lifetime_in_seconds": 1000, 
        "rule_engine_server_execution_time_in_seconds": 120, 
        "rule_engine_server_sleep_time_in_seconds": 30, 
        "transfer_buffer_size_for_parallel_transfer_in_megabytes": 4, 
        "transfer_chunk_size_for_parallel_transfer_in_megabytes": 40
    }, 
    "catalog_provider_hosts": [
        "irods.novalocal"
    ], 
    "catalog_service_role": "provider", 
    "client_api_whitelist_policy": "enforce", 
    "default_dir_mode": "0750", 
    "default_file_mode": "0600", 
    "default_hash_scheme": "SHA256", 
    "default_resource_name": "demoResc", 
    "environment_variables": {}, 
    "federation": [], 
    "match_hash_policy": "compatible", 
    "negotiation_key": "abcdefghijklmnopqrstuvwxyzabcdef", 
    "plugin_configuration": {
        "authentication": {}, 
        "database": {
            "postgres": {
                "db_host": "localhost", 
                "db_name": "ICAT", 
                "db_odbc_driver": "PostgreSQL", 
                "db_password": "testpassword", 
                "db_port": 5432, 
                "db_username": "irods"
            }
        }, 
        "network": {}, 
        "resource": {}, 
        "rule_engines": [
           {
            "instance_name": "irods_rule_engine_plugin-storage_tiering-instance",
            "plugin_name": "irods_rule_engine_plugin-storage_tiering",
            "plugin_specific_configuration": {
    		 "data_transfer_log_level" : "LOG_NOTICE"
              }
           },

           {
        "instance_name": "irods_rule_engine_plugin-apply_access_time-instance",
        "plugin_name": "irods_rule_engine_plugin-apply_access_time",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_verification-instance",
        "plugin_name": "irods_rule_engine_plugin-data_verification",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_replication-instance",
        "plugin_name": "irods_rule_engine_plugin-data_replication",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_movement-instance",
        "plugin_name": "irods_rule_engine_plugin-data_movement",
        "plugin_specific_configuration": {
        }
    },

     {
                "instance_name": "irods_rule_engine_plugin-python-instance",
                "plugin_name": "irods_rule_engine_plugin-python",
                "plugin_specific_configuration": {}
     },

 
           {
                "instance_name": "irods_rule_engine_plugin-irods_rule_language-instance", 
                "plugin_name": "irods_rule_engine_plugin-irods_rule_language", 
                "plugin_specific_configuration": {
                "re_data_variable_mapping_set": [
                        "core"
                    ], 
                    "re_function_name_mapping_set": [
                        "core"
                    ], 
                    "re_rulebase_set": [
                         "training",
			"core"
                    ], 
                    "regexes_for_supported_peps": [
                        "ac[^ ]*", 
                        "msi[^ ]*", 
                        "[^ ]*pep_[^ ]*_(pre|post|except|finally)"
                    ]

		}, 
                "shared_memory_instance": "irods_rule_language_rule_engine"
            }, 
            {
                "instance_name": "irods_rule_engine_plugin-cpp_default_policy-instance", 
                "plugin_name": "irods_rule_engine_plugin-cpp_default_policy", 
                "plugin_specific_configuration": {}
            }
        ]
    }, 
    "rule_engine_namespaces": [
        ""
    ], 
    "schema_name": "server_config", 
    "schema_validation_base_uri": "file:///var/lib/irods/configuration_schemas", 
    "schema_version": "v3", 
    "server_control_plane_encryption_algorithm": "AES-256-CBC", 
    "server_control_plane_encryption_num_hash_rounds": 16, 
    "server_control_plane_key": "abcdefghijklmnopqrstuvwxyzabcdef", 
    "server_control_plane_port": 1248, 
    "server_control_plane_timeout_milliseconds": 10000, 
    "server_port_range_end": 20199, 
    "server_port_range_start": 20000, 
    "xmsg_port": 1279, 
    "zone_auth_scheme": "native", 
    "zone_key": "zone_key", 
    "zone_name": "tempZone", 
    "zone_port": 1247, 
    "zone_user": "rods"
}


Write rules

IRODS provides a framework to write rules in a C++ style language by default. On top of this, rules can also be written in Python2.7 using the Python plugin. Detailed information about the rule engine framework can be found at:

Moreover, in both the C++ and Python rule engines, rules can be made to run as static rules or as Dynamic PEPs(Policy Enforcement Points)

In the following code section, static rules have been written in Python to convert any csv file to json format, whenever it is written to IRODS.


# /etc/irods/core.py

import os
import session_vars
import sys
import io
import json
import csv
from StringIO import StringIO



def convert_CSV_to_JSON(csv_text):
    csv_rows = []
    reader = csv.DictReader(StringIO(csv_text))
    field = reader.fieldnames
    for row in reader:
        csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
    return json.dumps(csv_rows, sort_keys=False, indent=4, separators=(',', ': '))        


def acPostProcForPut(rule_args, callback, rei):
    
    # Reading session variables
    sv = session_vars.get_map(rei)
    objpath = sv['data_object']['object_path']

    # Checking the extension of file being written
    if not objpath.endswith('.csv'):
        return

    callback.writeLine('serverLog', str(sv))

    # Opening csv file to read
    oflags = "objPath={filename}++++rescName=demoResc++++replNum=0++++openFlags=O_RDONLY".format(filename=objpath)
    callback.writeLine('serverLog', 'oflags = ' + oflags)

    ret_val = callback.msiDataObjOpen(oflags, 0)
    file_desc = ret_val['arguments'][1]

    # Getting size of file to read
    ret_val = callback.msiObjStat(objpath, irods_types.RodsObjStat())
    source_file_stat = ret_val['arguments'][1]
    callback.writeLine('serverLog', 'objstat = ' + str(source_file_stat))
    length = source_file_stat.objSize
    
    # Reading data from file
    ret_val = callback.msiDataObjRead(file_desc, length, irods_types.BytesBuf())
    read_buf = ret_val['arguments'][2]
    callback.writeLine('serverLog', str(ret_val))

    read_data = ''.join(read_buf.buf)

    # Converting csv to json
    data = convert_CSV_to_JSON(read_data)

    # Creating json file to write
    ret_val = callback.msiDataObjCreate(objpath.replace(".csv", ".json"), "destRescName=demoResc++++forceFlag=", 0)
    file_desc_b = ret_val['arguments'][2]

    ret_val = callback.msiDataObjWrite(file_desc_b, bytes(data), 0)

    # Closing both files
    callback.msiDataObjClose(file_desc, 0)
    callback.msiDataObjClose(file_desc_b, 0)



Testing

Write any '.csv' file to irods, and a json file should be created.

$ iput d.csv
$ ils -l
/tempZone/home/rods:
  rods              0 demoResc      1220939 2020-07-13.08:12 & d.csv
  rods              0 demoResc      3843622 2020-07-13.08:12 & d.json