Difference between revisions of "IRODS: Advanced Configurations"
| Line 90: | Line 90: | ||
== Restaging Tiered Data == | == Restaging Tiered Data == | ||
| − | + | When the user requests for the file, the contents will be returned and the file moved to the mimimum_restaging_tier or the lowest tier if none has been specified. | |
<nowiki> | <nowiki> | ||
$ imeta add -R medium_resc irods::storage_tiering::minimum_restage_tier true | $ imeta add -R medium_resc irods::storage_tiering::minimum_restage_tier true | ||
| − | |||
</nowiki> | </nowiki> | ||
Revision as of 09:06, 13 July 2020
Storage Tiering
Install Storage tiering plugin
$ sudo yum install irods-rule-engine-plugin-storage-tiering
Add the following entry in "rule_engines" array in /etc/irods/server_config.json, above the "irods_rule_engine_plugin-irods_rule_language" plugin entry.
{
"instance_name": "irods_rule_engine_plugin-storage_tiering-instance",
"plugin_name": "irods_rule_engine_plugin-storage_tiering",
"plugin_specific_configuration": {
"data_transfer_log_level" : "LOG_NOTICE"
}
},
{
"instance_name": "irods_rule_engine_plugin-apply_access_time-instance",
"plugin_name": "irods_rule_engine_plugin-apply_access_time",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_verification-instance",
"plugin_name": "irods_rule_engine_plugin-data_verification",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_replication-instance",
"plugin_name": "irods_rule_engine_plugin-data_replication",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_movement-instance",
"plugin_name": "irods_rule_engine_plugin-data_movement",
"plugin_specific_configuration": {
}
},
Creating Storage resources
$ iadmin mkresc fast_resc unixfilesystem `hostname`:/tmp/irods/fast_resc $ iadmin mkresc medium_resc unixfilesystem `hostname`:/tmp/irods/medium_resc $ iadmin mkresc slow_resc unixfilesystem `hostname`:/tmp/irods/slow_resc
Creating a Tier Group
Set position of created resource in a tiering hierarchy
$ imeta add -R fast_resc irods::storage_tiering::group example_group 0 $ imeta add -R medium_resc irods::storage_tiering::group example_group 1 $ imeta add -R slow_resc irods::storage_tiering::group example_group 2
Setting Tiering Policy
Set how long a file can stay in one tier(in seconds), before being moved to the next tier
$ imeta add -R fast_resc irods::storage_tiering::time 120 $ imeta add -R medium_resc irods::storage_tiering::time 300
Launch tiering policy with delay engine
Create the following ruleset file with any name, but '.r' extension. For example: example_tiering_invocation.r The following rule will be invoked forever, with an interval of 1 second
{
"rule-engine-instance-name": "irods_rule_engine_plugin-storage_tiering-instance",
"rule-engine-operation": "irods_policy_schedule_storage_tiering",
"delay-parameters": "<INST_NAME>irods_rule_engine_plugin-storage_tiering-instance</INST_NAME><PLUSET>1s</PLUSET><EF>REPEAT FOR EVER</EF>",
"storage-tier-groups": [
"example_group_g2",
"example_group"
]
}
INPUT null
OUTPUT ruleExecOut
Now, load the created ruleset in the IRODS delay engine.
$ irule -r irods_rule_engine_plugin-storage_tiering-instance -F example_tiering_invocation.r
The rules currently loaded in the delay engine can be see with the 'iqstat' command.
Restaging Tiered Data
When the user requests for the file, the contents will be returned and the file moved to the mimimum_restaging_tier or the lowest tier if none has been specified.
$ imeta add -R medium_resc irods::storage_tiering::minimum_restage_tier true
Test Storage Tiering
Writing file 'foo' to fast_resc. After the configured time for each tier, the file will move from fast_resc to medium_resc, then slow_resc. This movement can be seen with the command 'ils -l'
$ iput -R fast_resc foo $ ils -l $ iget foo
IRODS Rules
Install IRODS Python plugin
$ sudo yum install irods-rule-engine-plugin-python-4.2.7
Add the following entry in "rule_engines" array in /etc/irods/server_config.json, above the "irods_rule_engine_plugin-irods_rule_language" plugin entry.
{
"instance_name": "irods_rule_engine_plugin-python-instance",
"plugin_name": "irods_rule_engine_plugin-python",
"plugin_specific_configuration": {}
}
The complete server_config.json with storage tiering and python rule engine plugin enabled looks like this:
{
"advanced_settings": {
"default_log_rotation_in_days": 5,
"default_number_of_transfer_threads": 4,
"default_temporary_password_lifetime_in_seconds": 120,
"maximum_number_of_concurrent_rule_engine_server_processes": 4,
"maximum_size_for_single_buffer_in_megabytes": 32,
"maximum_temporary_password_lifetime_in_seconds": 1000,
"rule_engine_server_execution_time_in_seconds": 120,
"rule_engine_server_sleep_time_in_seconds": 30,
"transfer_buffer_size_for_parallel_transfer_in_megabytes": 4,
"transfer_chunk_size_for_parallel_transfer_in_megabytes": 40
},
"catalog_provider_hosts": [
"irods.novalocal"
],
"catalog_service_role": "provider",
"client_api_whitelist_policy": "enforce",
"default_dir_mode": "0750",
"default_file_mode": "0600",
"default_hash_scheme": "SHA256",
"default_resource_name": "demoResc",
"environment_variables": {},
"federation": [],
"match_hash_policy": "compatible",
"negotiation_key": "abcdefghijklmnopqrstuvwxyzabcdef",
"plugin_configuration": {
"authentication": {},
"database": {
"postgres": {
"db_host": "localhost",
"db_name": "ICAT",
"db_odbc_driver": "PostgreSQL",
"db_password": "testpassword",
"db_port": 5432,
"db_username": "irods"
}
},
"network": {},
"resource": {},
"rule_engines": [
{
"instance_name": "irods_rule_engine_plugin-storage_tiering-instance",
"plugin_name": "irods_rule_engine_plugin-storage_tiering",
"plugin_specific_configuration": {
"data_transfer_log_level" : "LOG_NOTICE"
}
},
{
"instance_name": "irods_rule_engine_plugin-apply_access_time-instance",
"plugin_name": "irods_rule_engine_plugin-apply_access_time",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_verification-instance",
"plugin_name": "irods_rule_engine_plugin-data_verification",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_replication-instance",
"plugin_name": "irods_rule_engine_plugin-data_replication",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-data_movement-instance",
"plugin_name": "irods_rule_engine_plugin-data_movement",
"plugin_specific_configuration": {
}
},
{
"instance_name": "irods_rule_engine_plugin-python-instance",
"plugin_name": "irods_rule_engine_plugin-python",
"plugin_specific_configuration": {}
},
{
"instance_name": "irods_rule_engine_plugin-irods_rule_language-instance",
"plugin_name": "irods_rule_engine_plugin-irods_rule_language",
"plugin_specific_configuration": {
"re_data_variable_mapping_set": [
"core"
],
"re_function_name_mapping_set": [
"core"
],
"re_rulebase_set": [
"training",
"core"
],
"regexes_for_supported_peps": [
"ac[^ ]*",
"msi[^ ]*",
"[^ ]*pep_[^ ]*_(pre|post|except|finally)"
]
},
"shared_memory_instance": "irods_rule_language_rule_engine"
},
{
"instance_name": "irods_rule_engine_plugin-cpp_default_policy-instance",
"plugin_name": "irods_rule_engine_plugin-cpp_default_policy",
"plugin_specific_configuration": {}
}
]
},
"rule_engine_namespaces": [
""
],
"schema_name": "server_config",
"schema_validation_base_uri": "file:///var/lib/irods/configuration_schemas",
"schema_version": "v3",
"server_control_plane_encryption_algorithm": "AES-256-CBC",
"server_control_plane_encryption_num_hash_rounds": 16,
"server_control_plane_key": "abcdefghijklmnopqrstuvwxyzabcdef",
"server_control_plane_port": 1248,
"server_control_plane_timeout_milliseconds": 10000,
"server_port_range_end": 20199,
"server_port_range_start": 20000,
"xmsg_port": 1279,
"zone_auth_scheme": "native",
"zone_key": "zone_key",
"zone_name": "tempZone",
"zone_port": 1247,
"zone_user": "rods"
}
Write rules
IRODS provides a framework to write rules in a C++ style language by default. On top of this, rules can also be written in Python2.7 using the Python plugin. Detailed information about the rule engine framework can be found at:
- https://docs.irods.org/4.2.7/plugins/pluggable_rule_engine/
- https://docs.irods.org/4.2.7/plugins/irods_rule_language/
- https://docs.irods.org/4.2.7/plugins/dynamic_policy_enforcement_points/
Moreover, in both the C++ and Python rule engines, rules can be made to run as static rules or as Dynamic PEPs(Policy Enforcement Points)
In the following code section, static rules have been written in Python to convert any csv file to json format, whenever it is written to IRODS.
# /etc/irods/core.py
import os
import session_vars
import sys
import io
import json
import csv
from StringIO import StringIO
def convert_CSV_to_JSON(csv_text):
csv_rows = []
reader = csv.DictReader(StringIO(csv_text))
field = reader.fieldnames
for row in reader:
csv_rows.extend([{field[i]:row[field[i]] for i in range(len(field))}])
return json.dumps(csv_rows, sort_keys=False, indent=4, separators=(',', ': '))
def acPostProcForPut(rule_args, callback, rei):
# Reading session variables
sv = session_vars.get_map(rei)
objpath = sv['data_object']['object_path']
# Checking the extension of file being written
if not objpath.endswith('.csv'):
return
callback.writeLine('serverLog', str(sv))
# Opening csv file to read
oflags = "objPath={filename}++++rescName=demoResc++++replNum=0++++openFlags=O_RDONLY".format(filename=objpath)
callback.writeLine('serverLog', 'oflags = ' + oflags)
ret_val = callback.msiDataObjOpen(oflags, 0)
file_desc = ret_val['arguments'][1]
# Getting size of file to read
ret_val = callback.msiObjStat(objpath, irods_types.RodsObjStat())
source_file_stat = ret_val['arguments'][1]
callback.writeLine('serverLog', 'objstat = ' + str(source_file_stat))
length = source_file_stat.objSize
# Reading data from file
ret_val = callback.msiDataObjRead(file_desc, length, irods_types.BytesBuf())
read_buf = ret_val['arguments'][2]
callback.writeLine('serverLog', str(ret_val))
read_data = ''.join(read_buf.buf)
# Converting csv to json
data = convert_CSV_to_JSON(read_data)
# Creating json file to write
ret_val = callback.msiDataObjCreate(objpath.replace(".csv", ".json"), "destRescName=demoResc++++forceFlag=", 0)
file_desc_b = ret_val['arguments'][2]
ret_val = callback.msiDataObjWrite(file_desc_b, bytes(data), 0)
# Closing both files
callback.msiDataObjClose(file_desc, 0)
callback.msiDataObjClose(file_desc_b, 0)
Testing
Write any '.csv' file to irods, and a json file should be created.
$ iput d.csv $ ils -l /tempZone/home/rods: rods 0 demoResc 1220939 2020-07-13.08:12 & d.csv rods 0 demoResc 3843622 2020-07-13.08:12 & d.json