Snippet Ruby

With snippet Ruby, you can free your mind

  1. For Loop

    # Loop with defined the range
    for i in from..to
    end
    # Loop date range
    for date in from..to
    end
    # Loop collection
    collection.each do |item|
    end
    # Loop collection with index
    collection.each_with_index do |item,i|
      if i > 2
        break
      elsif i < 2
        next
      end
    end
    
  2. File

    # create file
    # w: overwrite exist file or create new if not exist
    # a: append to exist file or create new if not exist
    File.open(file_path, "w") { |f|
      f.write("text line\n")
    }
    # create gzip file
    require 'zlib'
    Zlib::GzipWriter.open(file_path) do |gz|
      gz << "gzip text line\n"
    end
    # create CSV file
    CSV.open(file_path, "w") do |csv|
      csv << ["item1","item2","item3","item4","item5"]
    end
    # read all file at once
    read_all << File.read(file_path)
    
  3. Directory

    # check exist directory
    File.directory?(dir)
    # working with directory
    require 'fileutils'
    FileUtils.mkdir (dir, options)
    FileUtils.rmdir(dir, options)
    FileUtils.mv(list, dir, options)
    
  4. Date

    # print date with pattern
    require 'date'
    yyyymmdd = Date.strftime("%Y%m%d")
    yyyymmddhhmmss = DateTime.strftime("%Y%m%d%H%M%S")
    # parse date
    date = DateTime.parse("20140224")
    # calculate date
    yesterday = Date.today - 1
    # get now with local time
    now = Time.now.localtime
    
  5. Numberic

    # create random number
    random = Random.new
    minute = random.rand(10...59)
    # round ceil
    number.ceil
    # round floor
    number.floor
    # get integer value from number
    number.to_i
    
  6. String

    # split string with separator
    string.split("_")
    # check empty string
    string.empty?
    # create random word from dictionary
    # gem install random-word
    # require 'random-word'
    RandomWord.adjs.next
    RandomWord.nouns.next
    RandomWord.phrases.next
    # create random string
    o = [('a'..'z'),('A'..'Z'),('0'..'9')].map{|i|i.to_a}.flatten
    (0...20).map{o[rand(o.length)]}.join
    
  7. Array

    # create an array
    arr = Array.new
    # add item to an array
    arr.push(item)
    # remove item from an array
    arr.delete?(item)
    # check item exist in an array
    arr.include?(item)
    # check empty array
    arr.empty?
    # get random a item in an array
    ('a'..'z').to_a.sample
    ["a","0","1","25","b","c"].sample
    [true,false].sample
    
  8. Check encoding UTF-8

    line = "file row content encoding SJIS or UTF-8"
    if not row.valid_encoding?
      line = row.force_encoding("Windows-31J").encode("UTF-8")
    end
    puts line + " has encoding UTF-8"
    
  9. Input and output with screen

    # get input value from screen
    input = gets
    # print a value to screen
    puts "We have param1 = #{param1} and param2 = #{param2}"
    
  10. [MySQL]Ruby MySQL

    # create connection MySQL
    connection = Mysql::new(host, user, password, db_name)
    # query
    result = connection.query("SELECT * FROM table")
    # loop result
    result.each do |item|
      puts item
    end
    # query with Unicode character
    connection = Mysql::new(host, user, password, db_name)
    connection.query('SET NAMES utf8')
    
  11. [AWS][S3]Ruby AWS

    # configure aws
    AWS.config(
      :access_key_id => CONF[:credentials][:access_key_id],
      :secret_access_key => CONF[:credentials][:secret_access_key],
      :region => CONF[:credentials][:region]
    )
    
    # initialize
    emr = AWS::EMR.new
    s3 = AWS::S3.new
    bucket = s3.buckets[bucket_name]
    
    # check job flows status
    emr.job_flows.with_state("STARTING","RUNNING").each do |job|
      puts job.state
    end
    
    # browser object in s3 directory
    bucket = s3.buckets[CONF[:bucket]]
    bucket.objects.with_prefix(CONF[:bucket][:folder]).collect(&:key).each do |file|
    end
    
    # upload file to s3
    bucket.objects[s3_file_path].write(Pathname.new(local_file_path))
    
    # check file uploaded file success with MD5
    require 'digest/md5'
    # reference https://github.com/aws/aws-sdk-ruby/issues/196
    # first, create checksum for file before upload
    checksum = Digest::MD5.hexdigest File.read(local_file_path)
    # add checksum to metadata of file need upload
    bucket.objects[s3_file_path].write file: local_file_path, metadata: {checksum: checksum}
    # finally, read checksum from s3 object for comparison
    bucket.objects[s3_file_path].metadata[:checksum]
    # we can check file uploaded by attributing content_md5
    # reference http://docs.aws.amazon.com/AWSRubySDK/latest/AWS/S3/S3Object.html#write-instance_method
    
    # create job flow
    # use Client class
    # http://docs.aws.amazon.com/AWSRubySDK/latest/AWS/EMR/Client.html
    bootstrap_actions = Array.new
    bootstrap_actions.push({
      :name => "Configure Daemons",
      :script_bootstrap_action => {
        :path => "s3://elasticmapreduce/bootstrap-actions/configure-daemons",
        :args => ["--namenode-heap-size=2048","--namenode-opts=-XX:GCTimeRatio=19"]
      }
    })
    job_flow = emr.client.run_job_flow(
      :name => "Job Name",
      :ami_version => "latest",
      :log_uri => CONF[:bucket][:log],
      :instances => {
        :instance_count => CONF[:num_instances].to_i,
        :master_instance_type => CONF[:master_instance_type],
        :slave_instance_type => CONF[:slave_instance_type],
       #:ec2_key_name => "key_pair",
       #:keep_job_flow_alive_when_no_steps => true,
       #:hadoop_version => "1.0.3"
      },
      :bootstrap_actions => bootstrap_actions
    )
    
    # add job step to exist job flow
    args = Array.new
    args.push("args1")
    emr.job_flows[job_flow_id].add_steps([{
      :name => "Step Name",
      :action_on_failure => "TERMINATE_JOB_FLOW",
      :hadoop_jar_step => {
        :jar => CONF[:bucket][:jar],
        :main_class => CONF[:main_class],
        :args => (args)
      }
    }])
    
    # setup hadoop debugging
    args = Array.new
    args.push("s3://ap-northeast-1.elasticmapreduce/libs/state-pusher/0.1/fetch")
    emr.job_flows[job_flow_id].add_steps([{
      :name => "Setup Hadoop Debugging",
      :action_on_failure => "TERMINATE_JOB_FLOW",
      :hadoop_jar_step => {
        :jar => "s3://ap-northeast-1.elasticmapreduce/libs/script-runner/script-runner.jar",
        :args => (args)
      }
    }])
    
    # setup hive
    args = Array.new
    args.push("s3://ap-northeast-1.elasticmapreduce/libs/hive/hive-script")
    args.push("--base-path")
    args.push("s3://ap-northeast-1.elasticmapreduce/libs/hive/")
    args.push("--install-hive")
    args.push("--hive-versions")
    args.push("latest")
    emr.job_flows[job_flow_id].add_steps([{
      :name => "Setup Hive",
      :action_on_failure => "TERMINATE_JOB_FLOW",
      :hadoop_jar_step => {
        :jar => "s3://ap-northeast-1.elasticmapreduce/libs/script-runner/script-runner.jar",
        :args => (args)
      }
    }])
    
    # create job flow simultaneously with setup hadoop debugging and hive
    emr = AWS::EMR.new
    job_flow = emr.client.run_job_flow(
      :name => "Job Name",
      :ami_version => "latest",
      :log_uri => CONF[:bucket][:log],
      :instances => {
        :instance_count => CONF[:num_instances].to_i,
        :master_instance_type => CONF[:master_instance_type],
        :slave_instance_type => CONF[:slave_instance_type]
      },
      :steps => [
        {
        :name => "Setup Hive",
        :action_on_failure => "TERMINATE_JOB_FLOW",
        :hadoop_jar_step => {
          :jar => "s3://ap-northeast-1.elasticmapreduce/libs/script-runner/script-runner.jar",
          :args => [
            "s3://ap-northeast-1.elasticmapreduce/libs/hive/hive-script",
            "--base-path",
            "s3://ap-northeast-1.elasticmapreduce/libs/hive/",
            "--install-hive",
            "--hive-versions",
            "latest"
          ]
        }},
        {
        :name => "Setup Hadoop Debugging",
        :action_on_failure => "TERMINATE_JOB_FLOW",
        :hadoop_jar_step => {
          :jar => "s3://ap-northeast-1.elasticmapreduce/libs/script-runner/script-runner.jar",
          :args => [
            "s3://ap-northeast-1.elasticmapreduce/libs/state-pusher/0.1/fetch"
          ]
        }}
      ]
    )
    
    # run hive script by exist job flow
    args = Array.new
    args.push("s3://ap-northeast-1.elasticmapreduce/libs/hive/hive-script")
    args.push("--run-hive-script")
    args.push("--hive-versions")
    args.push("latest")
    args.push("--args")
    args.push("-f")
    args.push("s3://bucket/hive/script/script.q")
    args.push("-d")
    args.push("INPUT=s3://bucket/hive/input")
    args.push("-d")
    args.push("OUTPUT=s3://bucket/hive/output")
    emr.job_flows[job_flow_id].add_steps([{
      :name => "Run Hive Script",
      :action_on_failure => "CONTINUE",
      :hadoop_jar_step => {
        :jar => "s3://ap-northeast-1.elasticmapreduce/libs/script-runner/script-runner.jar",
        :args => (args)
      }
    }])
    
    # add UDF lib from s3 to exist job flow
    udf_file_path = "s3://bucket/hive/udf/custom_udf.jar"
    emr = AWS::EMR.new
    emr.job_flows[job_flow_id].add_steps([{
      :name => "Add UDF to Hadoop Libs",
      :action_on_failure => "TERMINATE_JOB_FLOW",
      :hadoop_jar_step => {
        :jar => "s3://ap-northeast-1.elasticmapreduce/libs/script-runner/script-runner.jar",
        :args => [
          "s3://ap-northeast-1.elasticmapreduce/libs/hive/hive-script",
          "--run-hive-script",
          "--hive-versions",
          "latest",
          "--args",
          "-e",
          "dfs -get #{udf_file_path} ${env:HADOOP_HOME}/lib/"
        ]
      }
    }])
    

Comments

Popular posts from this blog

Reduce TIME_WAIT Socket Connections