IRODS: Advanced Configurations

From Define Wiki
Revision as of 08:53, 13 July 2020 by Vipul (talk | contribs)
Jump to navigation Jump to search

Storage Tiering

Install Storage tiering plugin

$ sudo yum install irods-rule-engine-plugin-storage-tiering

Add the following entry in "rule_engines" array in /etc/irods/server_config.json, above the "irods_rule_engine_plugin-irods_rule_language" plugin entry.

{
            "instance_name": "irods_rule_engine_plugin-storage_tiering-instance",
            "plugin_name": "irods_rule_engine_plugin-storage_tiering",
            "plugin_specific_configuration": {
    		 "data_transfer_log_level" : "LOG_NOTICE"
              }
           },

           {
        "instance_name": "irods_rule_engine_plugin-apply_access_time-instance",
        "plugin_name": "irods_rule_engine_plugin-apply_access_time",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_verification-instance",
        "plugin_name": "irods_rule_engine_plugin-data_verification",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_replication-instance",
        "plugin_name": "irods_rule_engine_plugin-data_replication",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_movement-instance",
        "plugin_name": "irods_rule_engine_plugin-data_movement",
        "plugin_specific_configuration": {
        }
    },


Creating Storage resources

$ iadmin mkresc fast_resc unixfilesystem `hostname`:/tmp/irods/fast_resc
$ iadmin mkresc medium_resc unixfilesystem `hostname`:/tmp/irods/medium_resc
$ iadmin mkresc slow_resc unixfilesystem `hostname`:/tmp/irods/slow_resc

Creating a Tier Group

Set position of created resource in a tiering hierarchy

$ imeta add -R fast_resc irods::storage_tiering::group example_group 0
$ imeta add -R medium_resc irods::storage_tiering::group example_group 1
$ imeta add -R slow_resc irods::storage_tiering::group example_group 2

Setting Tiering Policy

Set how long a file can stay in one tier(in seconds), before being moved to the next tier

$ imeta add -R fast_resc irods::storage_tiering::time 120
$ imeta add -R medium_resc irods::storage_tiering::time 300

Launch tiering policy with delay engine

Create the following ruleset file with any name, but '.r' extension. For example: example_tiering_invocation.r

{
   "rule-engine-instance-name": "irods_rule_engine_plugin-storage_tiering-instance",
   "rule-engine-operation": "irods_policy_schedule_storage_tiering",
   "delay-parameters": "<INST_NAME>irods_rule_engine_plugin-storage_tiering-instance</INST_NAME><PLUSET>1s</PLUSET><EF>REPEAT FOR EVER</EF>",
   "storage-tier-groups": [
       "example_group_g2",
       "example_group"
   ]
}
INPUT null
OUTPUT ruleExecOut


Now, load the created ruleset in the IRODS delay engine

$ irule -r irods_rule_engine_plugin-storage_tiering-instance -F example_tiering_invocation.r



IRODS Rules

Install IRODS Python plugin

$ sudo yum install irods-rule-engine-plugin-python-4.2.7

Add the following entry in "rule_engines" array in /etc/irods/server_config.json, above the "irods_rule_engine_plugin-irods_rule_language" plugin entry.

{
            "instance_name": "irods_rule_engine_plugin-python-instance",
            "plugin_name": "irods_rule_engine_plugin-python",
            "plugin_specific_configuration": {}
}

The complete server_config.json with storage tiering and python rule engine plugin enabled looks like this:

{
    "advanced_settings": {
        "default_log_rotation_in_days": 5, 
        "default_number_of_transfer_threads": 4, 
        "default_temporary_password_lifetime_in_seconds": 120, 
        "maximum_number_of_concurrent_rule_engine_server_processes": 4, 
        "maximum_size_for_single_buffer_in_megabytes": 32, 
        "maximum_temporary_password_lifetime_in_seconds": 1000, 
        "rule_engine_server_execution_time_in_seconds": 120, 
        "rule_engine_server_sleep_time_in_seconds": 30, 
        "transfer_buffer_size_for_parallel_transfer_in_megabytes": 4, 
        "transfer_chunk_size_for_parallel_transfer_in_megabytes": 40
    }, 
    "catalog_provider_hosts": [
        "irods.novalocal"
    ], 
    "catalog_service_role": "provider", 
    "client_api_whitelist_policy": "enforce", 
    "default_dir_mode": "0750", 
    "default_file_mode": "0600", 
    "default_hash_scheme": "SHA256", 
    "default_resource_name": "demoResc", 
    "environment_variables": {}, 
    "federation": [], 
    "match_hash_policy": "compatible", 
    "negotiation_key": "abcdefghijklmnopqrstuvwxyzabcdef", 
    "plugin_configuration": {
        "authentication": {}, 
        "database": {
            "postgres": {
                "db_host": "localhost", 
                "db_name": "ICAT", 
                "db_odbc_driver": "PostgreSQL", 
                "db_password": "testpassword", 
                "db_port": 5432, 
                "db_username": "irods"
            }
        }, 
        "network": {}, 
        "resource": {}, 
        "rule_engines": [
           {
            "instance_name": "irods_rule_engine_plugin-storage_tiering-instance",
            "plugin_name": "irods_rule_engine_plugin-storage_tiering",
            "plugin_specific_configuration": {
    		 "data_transfer_log_level" : "LOG_NOTICE"
              }
           },

           {
        "instance_name": "irods_rule_engine_plugin-apply_access_time-instance",
        "plugin_name": "irods_rule_engine_plugin-apply_access_time",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_verification-instance",
        "plugin_name": "irods_rule_engine_plugin-data_verification",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_replication-instance",
        "plugin_name": "irods_rule_engine_plugin-data_replication",
        "plugin_specific_configuration": {
        }
    },
    {
        "instance_name": "irods_rule_engine_plugin-data_movement-instance",
        "plugin_name": "irods_rule_engine_plugin-data_movement",
        "plugin_specific_configuration": {
        }
    },

     {
                "instance_name": "irods_rule_engine_plugin-python-instance",
                "plugin_name": "irods_rule_engine_plugin-python",
                "plugin_specific_configuration": {}
     },

 
           {
                "instance_name": "irods_rule_engine_plugin-irods_rule_language-instance", 
                "plugin_name": "irods_rule_engine_plugin-irods_rule_language", 
                "plugin_specific_configuration": {
                "re_data_variable_mapping_set": [
                        "core"
                    ], 
                    "re_function_name_mapping_set": [
                        "core"
                    ], 
                    "re_rulebase_set": [
                         "training",
			"core"
                    ], 
                    "regexes_for_supported_peps": [
                        "ac[^ ]*", 
                        "msi[^ ]*", 
                        "[^ ]*pep_[^ ]*_(pre|post|except|finally)"
                    ]

		}, 
                "shared_memory_instance": "irods_rule_language_rule_engine"
            }, 
            {
                "instance_name": "irods_rule_engine_plugin-cpp_default_policy-instance", 
                "plugin_name": "irods_rule_engine_plugin-cpp_default_policy", 
                "plugin_specific_configuration": {}
            }
        ]
    }, 
    "rule_engine_namespaces": [
        ""
    ], 
    "schema_name": "server_config", 
    "schema_validation_base_uri": "file:///var/lib/irods/configuration_schemas", 
    "schema_version": "v3", 
    "server_control_plane_encryption_algorithm": "AES-256-CBC", 
    "server_control_plane_encryption_num_hash_rounds": 16, 
    "server_control_plane_key": "abcdefghijklmnopqrstuvwxyzabcdef", 
    "server_control_plane_port": 1248, 
    "server_control_plane_timeout_milliseconds": 10000, 
    "server_port_range_end": 20199, 
    "server_port_range_start": 20000, 
    "xmsg_port": 1279, 
    "zone_auth_scheme": "native", 
    "zone_key": "zone_key", 
    "zone_name": "tempZone", 
    "zone_port": 1247, 
    "zone_user": "rods"
}


Write rules

IRODS provides a framework to write rules in a C++ style language by default. On top of this, rules can also be written in Python2.7 using the Python plugin. Detailed information about the rule engine framework can be found at:

Moreover, in both the C++ and Python rule engines, rules can be made to run as static rules or as Dynamic PEPs(Policy Enforcement Points)

In the following code section, static rules have been written in Python to convert any csv file to json format, whenever it is written to IRODS.


# /etc/irods/core.py

import os
import session_vars
import sys
import io
import json
import csv
from StringIO import StringIO



def convert_CSV_to_JSON(csv_text):
    csv_rows = []
    reader = csv.DictReader(StringIO(csv_text))
    field = reader.fieldnames
    for row in reader:
        csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
    return json.dumps(csv_rows, sort_keys=False, indent=4, separators=(',', ': '))        


def acPostProcForPut(rule_args, callback, rei):
    
    # Reading session variables
    sv = session_vars.get_map(rei)
    objpath = sv['data_object']['object_path']

    # Checking the extension of file being written
    if not objpath.endswith('.csv'):
        return

    callback.writeLine('serverLog', str(sv))

    # Opening csv file to read
    oflags = "objPath={filename}++++rescName=demoResc++++replNum=0++++openFlags=O_RDONLY".format(filename=objpath)
    callback.writeLine('serverLog', 'oflags = ' + oflags)

    ret_val = callback.msiDataObjOpen(oflags, 0)
    file_desc = ret_val['arguments'][1]

    # Getting size of file to read
    ret_val = callback.msiObjStat(objpath, irods_types.RodsObjStat())
    source_file_stat = ret_val['arguments'][1]
    callback.writeLine('serverLog', 'objstat = ' + str(source_file_stat))
    length = source_file_stat.objSize
    
    # Reading data from file
    ret_val = callback.msiDataObjRead(file_desc, length, irods_types.BytesBuf())
    read_buf = ret_val['arguments'][2]
    callback.writeLine('serverLog', str(ret_val))

    read_data = ''.join(read_buf.buf)

    # Converting csv to json
    data = convert_CSV_to_JSON(read_data)

    # Creating json file to write
    ret_val = callback.msiDataObjCreate(objpath.replace(".csv", ".json"), "destRescName=demoResc++++forceFlag=", 0)
    file_desc_b = ret_val['arguments'][2]

    ret_val = callback.msiDataObjWrite(file_desc_b, bytes(data), 0)

    # Closing both files
    callback.msiDataObjClose(file_desc, 0)
    callback.msiDataObjClose(file_desc_b, 0)



Testing

Write any '.csv' file to irods, and a json file should be created.

$ iput d.csv
$ ils -l
/tempZone/home/rods:
  rods              0 demoResc      1220939 2020-07-13.08:12 & d.csv
  rods              0 demoResc      3843622 2020-07-13.08:12 & d.json