# (c) 2012-2016 Inria by David Margery (david.margery@inria.fr) in the context of the Grid'5000 project # Licenced under the CeCILL B licence. require 'net/ssh' pdu_nodes= {} g5k_login=ENV["USER"] job_name = "Running the API all in one Tutorial to learn to reserve nodes #{File.basename($0)}" #Look for a previously submitted job of the same name #To avoid submitting submitting twice my_job=nil my_site=nil root.sites.each do |site| begin if my_job==nil site.jobs(:query => {:user =>g5k_login, :name => job_name}).each do |job| if job["name"] == job_name && job['state'] != 'error' my_job=job my_site=site break end end end rescue Restfully::HTTP::ServerError => e puts "Site #{site["uid"]} unreachable" # print e.message end end if my_job==nil #we fallback to looking for available resources suitable_nodes=[] root.sites.each do |site| begin site.clusters.each do |cluster| nodes_status=nil cluster.nodes.each do |node| if node["storage_devices"].size == 1 # there is at least one interesting node in this site # get the status of nodes nodes_status=site.status["nodes"] if nodes_status == nil status=nodes_status[node["uid"]+"."+site["uid"]+".grid5000.fr"] if status["soft"] == "free" if status["reservations"].size == 0 || (status["reservations"].size > 0 && Time.at(status["reservations"][0]["scheduled_at"])-Time.now>= 3000) suitable_nodes << { :node => node, :cluster => cluster, :site => site } else puts "#{node["uid"]} is free but not available long enough" end end end end end rescue Restfully::HTTP::ServerError => e puts "Could not access information from #{site["uid"]}" end end elected_node = suitable_nodes.pop if elected_node != nil puts "Attempt to create a job on #{elected_node[:node]["uid"]}.#{elected_node[:site]["uid"]}.grid5000.fr" begin my_site=elected_node[:site] my_job=my_site.jobs.submit( :resources => "nodes=1,walltime=00:30:00", :properties => "network_address in ('#{elected_node[:node]["uid"]}.#{elected_node[:site]["uid"]}.grid5000.fr')", :command => "sleep 3600", :types => ["allow_classic_ssh"], :name => job_name ) rescue Restfully::HTTP::ServerError => e status=elected_node[:node].status(:query => { :reservations_limit => '5'}) puts e.message puts "#{status["system_state"]}" pp status["reservations"] puts "Could node get a job on #{elected_node[:node]["uid"]}. Please retry on another node" end end end if my_job != nil my_job.reload puts "Found job #{my_job["uid"]} in state #{my_job["state"]}" puts " expected to start at #{Time.at(my_job["scheduled_at"])}" if my_job["scheduled_at"] != nil wait_time=0 while my_job.reload['state'] != "running" && wait_time < 30 sleep 1 wait_time+=1 print '.' end if my_job['state'] == "running" puts "running on node #{my_job["assigned_nodes"]}. Will do something with this job" fqdn=my_job["assigned_nodes"][0] host=fqdn.split('.')[0] cluster= host.match(/(\w+)-.*/)[1] node=my_site.clusters[cluster.to_sym].nodes[host.to_sym] threads=node["architecture"]["nb_threads"] gw=nil if Socket.gethostname !~ /grid5000.fr/ require 'net/ssh/gateway' # Need to connect to the node through a gateway # A lot here depends on your ssh config # usefull options are # * :keys_only => true to use specified keys before keys offered by your ssh-agent # * :verbose => :debug to see why the connection fails # * :keys => ["private_key_file to use"] # * :config => false to bypass your ssh_config file puts " created a gateway for the ssh connexion" gw=Net::SSH::Gateway.new('access.grid5000.fr', g5k_login) end ssh= if gw puts " connecting to #{my_job["assigned_nodes"][0]} through gateway" ssh=gw.ssh(my_job["assigned_nodes"][0], g5k_login) else ssh=Net::SSH.start(my_job["assigned_nodes"][0], g5k_login) end #stress the node a bit to see the impact on consumption events={} puts " running date" start=DateTime.parse(ssh.exec!('date')) cmd_time=240 cmds=["stress -t #{cmd_time} -c #{threads}", "stress -t #{cmd_time} -i #{threads}","stress -t #{cmd_time} -m #{threads}","stress -t #{cmd_time} -c #{threads} -i #{threads} -m #{threads}", "sleep #{cmd_time}"] cmds.each do |cmd| puts " running #{cmd} on node" events[DateTime.parse(ssh.exec!('date')).to_time.to_i]= "Now running #{cmd}" ssh.exec!(cmd) end ssh.close gw.close(ssh.transport.port) if gw #get the values from this experiment [:cpu_user,:cpu_system,:mem_free].each do |metric| data_desc=my_site.metrics[metric] pdu_values=data_desc.timeseries(:query => {:resolution => 15, :from => start.to_time.to_i})[host.to_sym] sample_timestamp=pdu_values["from"] sample_resolution=pdu_values["resolution"] puts "Got values from #{Time.at(sample_timestamp)} at a resolution of a value every #{sample_resolution}s for #{metric}" pdu_values["values"].each do |sample| if !events.has_key?(sample_timestamp) events[sample_timestamp]="" end events[sample_timestamp]+= " #{sample}% #{metric} measured" if [:cpu_user,:cpu_system].include?(metric) events[sample_timestamp]+= " #{sample} Bytes of #{metric} measured" if [:mem_free,:mem_cache].include?(metric) sample_timestamp+=sample_resolution end end events.keys.sort!.each do | timestamp| puts "#{Time.at(timestamp)}: #{events[timestamp]}" end end end