#!/usr/bin/env ruby # AdventureWorks for Postgres # by Lorin Thwaits # How to use this file: # Download "Adventure Works 2014 OLTP Script" from: # https://github.com/Microsoft/sql-server-samples/releases/download/adventureworks/AdventureWorks-oltp-install-script.zip # Extract the .zip and copy all of the CSV files into the same folder containing # this update_csvs.rb file and the install.sql file. # Modify the CSVs to work with Postgres by running: # ruby update_csvs.rb # Create the database and tables, import the data, and set up the views and keys with: # psql -c "CREATE DATABASE \"Adventureworks\";" # psql -d Adventureworks < install.sql # (you may need to also add: -U postgres to the above two commands) # All 68 tables are properly set up. # All 20 views are established. # 68 additional convenience views are added which: # * Provide a shorthand to refer to tables. # * Add an "id" column to a primary key or primary-ish key if it makes sense. # For example, with the convenience views you can simply do: # SELECT pe.p.firstname, hr.e.jobtitle # FROM pe.p # INNER JOIN hr.e ON pe.p.id = hr.e.id; # Instead of: # SELECT p.firstname, e.jobtitle # FROM person.person AS p # INNER JOIN humanresources.employee AS e ON p.businessentityid = e.businessentityid; # Schemas for these views: # pe = person # hr = humanresources # pr = production # pu = purchasing # sa = sales # Easily get a list of all of these in psql with: \dv (pe|hr|pr|pu|sa).* # Enjoy! Dir.glob('./*.csv') do |csv_file| f = if (is_needed = csv_file.end_with?('/Address.csv')) File.open(csv_file, "rb:WINDOWS-1252:UTF-8") else File.open(csv_file, "rb:UTF-16LE:UTF-8") end output = "" text = "" is_first = true is_pipes = false begin f.each do |line| if is_first if line.include?("+|") is_pipes = true end if line[0] == "\uFEFF" line = line[1..-1] is_needed = true end end is_first = false break if !is_needed if is_pipes if line.strip.end_with?("&|") text << line.gsub("|474946383961", "|\\\\x474946383961") # For GIF data .gsub(/\"/, "\"\"") .strip[0..-3] output << text.split("+|").map { |part| (part[1] == "<" && part[-1] == ">") ? '"' + part[1..-1] + '"' : (part.include?("\t") ? '"' + part + '"' : part) }.join("\t") output << "\n" text = "" else text << line.gsub(/\"/, "\"\"").gsub("\r\n", "\\n") end else output << line.gsub(/\"/, "\"\"").gsub(/\&\|\n/, "\n").gsub(/\&\|\r\n/, "\n") .gsub("\tE6100000010C", "\t\\\\xE6100000010C") # For geospatial data .gsub(/\r\n/, "\n") # Make everything compatible with Windows -- change \r\n into just \n end end if is_needed puts "Processing #{csv_file}" f.close w = File.open(csv_file + ".xyz", "w") w.write(output) w.close File.delete(csv_file) File.rename(csv_file + ".xyz", csv_file) end # Here's a list of files that get snagged here: # Address.csv # AWBuildVersion.csv # CreditCard.csv # Culture.csv # Currency.csv # Department.csv # EmployeeDepartmentHistory.csv # EmployeePayHistory.csv # Product.csv # ProductCostHistory.csv # ProductModelIllustration.csv # ProductReview.csv # SalesOrderDetail.csv # SalesTerritory.csv # Shift.csv # ShipMethod.csv # ShoppingCartItem.csv # SpecialOffer.csv # Vendor.csv # WorkOrder.csv rescue Encoding::InvalidByteSequenceError f.close end end