#!/usr/bin/ruby # # Original version August 18, 2010 # by Don Cruse # http://github.com/doncruse/citation-form # # If you want to know more about my projects, I write about # them on my law blog (http://www.scotxblog.com). # # This program is fully open source & free under the MIT license # License: http://www.opensource.org/licenses/mit-license.php # # You are free to modify this as you wish. # This is what is known as a "hash." Each word on the left # is mapped to one abbreviation. I chose lowercase to make # the matching process a little simpler. WORDS = { "academy" => "Acad.", "academies" => "Acads.", "administrative" => "Admin.", "administration" => "Admin.", "administrator" => "Adm'r", "administrators" => "Adm'rs", "administratrix" => "Adm'x", "advertising" => "Adver.", "agriculture" => "Agric.", "agricultural" => "Agric.", "america" => "Am.", "american" => "Am.", "and" => "&", "associate" => "Assoc.", "associates" => "Assocs.", "association" => "Assoc.", "associations" => "Assocs.", "atlantic" => "Atl.", "authority" => "Auth.", "authorities" => "Auths.", "automobile" => "Auto.", "automotive" => "Auto.", "automobiles" => "Autos.", "avenue" => "Ave.", "avenues" => "Aves.", "board" => "Bd.", "boards" => "Bds.", "broadcast" => "Broad.", "brotherhood" => "Bhd.", "brotherhoods" => "Bhds.", "broadcasting" => "Broad.", "bankruptcy" => "Bankr.", "brothers" => "Bros.", "building" => "Bldg.", "buildings" => "Bldgs.", "business" => "Bus.", "businesses" => "Bus.", "casualty" => "Cas.", "center" => "Ctr.", "centre" => "Ctr.", "centers" => "Ctrs.", "centres" => "Ctrs.", "central" => "Cent.", "chemical" => "Chem.", "chemicals" => "Chems.", "coalition" => "Coal.", "coalitions" => "Coals.", "college" => "Coll.", "colleges" => "Colls.", "commission" => "Comm'n", "commissions" => "Comm'ns", "commissioner" => "Comm'r", "commissioners" => "Comm'rs", "committee" => "Comm.", "committees" => "Comms.", "communication" => "Commc'n", "communications" => "Commc'ns", "community" => "Cmty.", "communities" => "Cmtys.", "company" => "Co.", "companies" => "Cos.", "compensation" => "Comp.", "condominium" => "Condo.", "condominiums" => "Condos.", "congress" => "Cong.", "congressional" => "Cong.", "congresses" => "Congs.", "consolidated" => "Consol.", "consolidateds" => "Consols.", "construction" => "Constr.", "constructions" => "Constrs.", "continental" => "Cont'l", "cooperative" => "Coop.", "cooperatives" => "Coops.", "corporation" => "Corp.", "corporations" => "Corps.", "correction" => "Corr.", "corrections" => "Corr.", "correctional" => "Corr.", "defense" => "Def.", "department" => "Dep't", "detention" => "Det.", "development" => "Dev.", "developments" => "Devs.", "director" => "Dir.", "directors" => "Dirs.", "discount" => "Disc.", "discounts" => "Discs.", "distributor" => "Distrib.", "distributors" => "Distribs.", "distributing" => "Distrib.", "district" => "Dist.", "districts" => "Dists.", "division" => "Div.", "divisions" => "Divs.", "east" => "E.", "eastern" => "E.", "economic" => "Econ.", "economics" => "Econ.", "economical" => "Econ.", "economy" => "Econ.", "economies" => "Econs.", "education" => "Educ.", "educational" => "Educ.", "electric" => "Elec.", "electrical" => "Elec.", "electricity" => "Elec.", "electronic" => "Elec.", "electronics" => "Elecs.", "engineer" => "Eng'r", "engineers" => "Eng'rs", "engineering" => "Eng'g", "enterprise" => "Enter.", "enterprises" => "Enters.", "entertainment" => "Entm't", "entertainments" => "Entm'ts", "environment" => "Env't", "environments" => "Env'ts", "environmental" => "Envtl.", "equality" => "Equal.", "equipment" => "Equip.", "equipments" => "Equips.", "examiner" => "Exam'r", "examiners" => "Exam'rs", "exchange" => "Exch.", "exchanges" => "Exchs.", "executor" => "Ex'r", "executrix" => "Ex'x", "executors" => "Ex'rs", "executrixes" => "Ex'xs", "export" => "Exp.", "exports" => "Exps.", "federal" => "Fed.", "federation" => "Fed'n", "federations" => "Fed'ns", "fidelity" => "Fid.", "finance" => "Fin.", "financial" => "Fin.", "financing" => "Fin.", "foundation" => "Found.", "foundations" => "Founds.", "general" => "Gen.", "government" => "Gov't", "governments" => "Gov'ts", "guaranty" => "Guar.", "guaranties" => "Guars.", "hospital" => "Hosp.", "hospitals" => "Hosps.", "housing" => "Hous.", "import" => "Imp.", "imports" => "Imps.", "importer" => "Imp.", "importers" => "Imps.", "importation" => "Imp.", "importations" => "Imps.", "incorporated" => "Inc.", "indemnity" => "Indem.", "indemnities" => "Indems.", "independent" => "Indep.", "independents" => "Indeps.", "industry" => "Indus.", "industries" => "Indus.", "industrial" => "Indus.", "information" => "Info.", "institute" => "Inst.", "institutes" => "Insts.", "institution" => "Inst.", "institutions" => "Insts.", "insurance" => "Ins.", "international" => "Int'l", "investment" => "Inv.", "investments" => "Invs.", "laboratory" => "Lab.", "laboratories" => "Labs.", "liability" => "Liab.", "liabilities" => "Liabs.", "limited" => "Ltd.", "limiteds" => "Ltds.", "litigation" => "Litig.", "litigations" => "Litigs.", "machine" => "Mach.", "machines" => "Machs.", "machinery" => "Mach.", "machineries" => "Machs.", "maintenance" => "Maint.", "management" => "Mgmt.", "managements" => "Mgmts.", "manufacturer" => "Mfr.", "manufacturers" => "Mfrs.", "manufacturing" => "Mfg.", "maritime" => "Mar.", "market" => "Mkt.", "markets" => "Mkts.", "marketing" => "Mktg.", "mechanic" => "Mech.", "mechanics" => "Mechs.", "mechanical" => "Mech.", "medical" => "Med.", "medicine" => "Med.", "memorial" => "Mem'l", "memorials" => "Mem'ls", "merchant" => "Merch.", "merchandise" => "Merch.", "merchandising" => "Merch.", "metropolitan" => "Metro.", "metropolitans" => "Metros.", "municipal" => "Mun.", "mutual" => "Mut.", "mutuals" => "Muts.", "national" => "Nat'l", "nationals" => "Nat'ls", "north" => "N.", "northern" => "N.", "northeast" => "Ne.", "northeastern" => "Ne.", "northwest" => "Nw.", "northwestern" => "Nw.", "number" => "No.", "organization" => "Org.", "organizing" => "Org.", "organizations" => "Orgs.", "pacific" => "Pac.", "partnership" => "P'ship", "partnerships" => "P'ships", "person" => "Pers.", "personal" => "Pers.", "personnel" => "Pers.", "pharmaceutics" => "Pharm.", "pharmaceutical" => "Pharm.", "pharmaceuticals" => "Pharms.", "perserve" => "Pres.", "preservation" => "Pres.", "probation" => "Prob.", "product" => "Prod.", "products" => "Prods.", "production" => "Prod.", "productions" => "Prods.", "professional" => "Prof'l", "professionals" => "Prof'ls", "property" => "Prop.", "properties" => "Props.", "protection" => "Prot.", "public" => "Pub.", "publication" => "Publ'n", "publishing" => "Publ'g", "publications" => "Publ'ns", "railroad" => "R.R.", "railway" => "Ry.", "refining" => "Ref.", "railways" => "Rys.", "regional" => "Reg'l", "rehabilitation" => "Rehab.", "reproductive" => "Reprod.", "reproduction" => "Reprod.", "resource" => "Res.", "resources" => "Res.", "restaurant" => "Rest", "restaurants" => "Rests.", "retirement" => "Ret.", "road" => "Rd.", "roads" => "Rds.", "saving" => "Sav.", "savings" => "Savs.", "school" => "Sch.", "schools" => "Sch.", "science" => "Sci.", "sciences" => "Scis.", "secretary" => "Sec'y", "secretaries" => "Sec'ys", "security" => "Sec.", "securities" => "Sec.", "service" => "Serv.", "services" => "Servs.", "shareholder" => "S'holder", "shareholders" => "S'holders", "social" => "Soc.", "society" => "Soc'y", "south" => "S.", "southern" => "S.", "southeast" => "Se.", "southeastern" => "Se.", "southwest" => "Sw.", "southwestern" => "Sw.", "steamship" => "S.S", "steamships" => "S.S", "street" => "St.", "subcommittee" => "Subcomm.", "subcommittees" => "Subcomms.", "surety" => "Sur.", "sureties" => "Surs.", "system" => "Sys.", "systems" => "Sys.", "technology" => "Tech.", "technologies" => "Techs.", "telecommunication" => "Telecomm.", "telecommunications" => "Telecomms.", "telephone" => "Tele.", "telegraph" => "Tele.", "telephones" => "Teles.", "telegraphs" => "Teles.", "temporary" => "Temp.", "temporaries" => "Temps.", "township" => "Twp.", "townships" => "Twps.", "transcontinental" => "Transcon.", "transport" => "Transp.", "transportation" => "Transp.", "transports" => "Transps.", "transportations" => "Transps.", "trustee" => "Tr.", "trustees" => "Trs.", "turnpike" => "Tpk.", "turnpikes" => "Tpk.", "uniform" => "Unif.", "university" => "Univ.", "universities" => "Univs.", "utility" => "Util.", "utilities" => "Utils.", "village" => "Vill.", "villages" => "Vills.", "west" => "W.", "western" => "W." } # These abbreviations check against the 18th edition of the Bluebook. # No claim of ownership is made about any abbrevation, because that # would be silly. I made some personal judgments about pluralization. PLACES = { 'alabama' => 'Ala.', 'american aamoa' => 'Am. Sam.', 'arizona' => 'Ariz.', 'arkansas' => 'Ark.', 'baltimore' => 'Balt.', 'california' => 'Cal.', 'chicago' => 'Chi.', 'colorado' => 'Colo.', 'connecticut' => 'Conn.', 'delaware' => 'Del.', 'district of columbia' => 'D.C.', 'florida' => 'Fla.', 'georgia' => "Ga.", 'hawaii' => 'Haw.', 'hawai\'i' => 'Haw.', 'illinois' => 'Ill.', 'indiana' => 'Ind.', 'kansas' => 'Kan.', 'kentucky' => "Ky.", 'los angeles' => "L.A.", 'louisiana' => "La.", 'maine' => 'Me.', 'maryland' => 'Md.', 'massachusetts' => 'Mass.', 'michigan' => "Mich.", 'minnesota' => 'Minn.', 'mississippi' => 'Miss.', 'missouri' => 'Mo.', 'montana' => 'Mont.', 'nebraska' => 'Neb.', 'nevada' => 'Nev.', 'new hampshire' => 'N.H.', 'new jersey' => 'N.J.', 'new mexico' => 'N.M.', 'new york' => 'N.Y.', 'north carolina' => 'N.C.', 'north dakota' => 'N.D.', 'northern mariana islands' => 'N. Mar. I.', 'oklahoma' => 'Okla.', 'oregon' => 'Or.', 'pennsylvania' => 'Pa.', 'philadelphia' => 'Phila.', 'puerto rico' => 'P.R.', 'rhode island' => 'R.I.', 'south carolina' => 'S.C.', 'south dakota' => 'S.D.', 'san francisco' => 'S.F.', 'tennessee' => 'Tenn.', 'texas' => 'Tex.', 'vermont' => 'Vt.', 'virgin islands' => 'V.I.', 'virginia' => 'Va.', 'washington' => 'Wash.', 'west virginia' => 'W. Va.', 'wisconsin' => 'Wis.', 'wyoming' => 'Wyo.' } ########################################################### # What follows is the substitution program. # # It takes the selected text, splits it into words, # checks to see if the word appears in the table above # (without regard to capitalization), and if so does # the substitution. Then it reassembles the text # with spaces. # # I have added some logic to understand punctuation. # It can understand opening or closing parentheses that # touch a word, and it can also understand a trailing comma # or semicolon. Any other punctuation will cause the word # not to be a match. # KNOWN ISSUE: This does not yet know how to process # terms that might be split into two words, such as "rail road" # or "south east." I may need some guidance about whether # those present citation edge cases. # KNOWN ISSUE: This produces 'dumb' apostrophes, which require # some tweaking in your word processor. # List of valid punctuation marks, by ASCII codes LEADING_PUNCTUATION = [40] TRAILING_PUNCTUATION = [41, 44, 59] casename = STDIN.read casename.strip! tmp = Array.new # break apart the cite into words casename.split(/\s/).each do |word| # reset things for each word leading_punctuation = [] trailing_punctuation = [] reassembled = "" # See if there is a leading parenthesis; if so, set it aside if LEADING_PUNCTUATION.include?(word.slice(0)) and (word.size > 1) leading_punctuation << word.slice!(0) end # See if there is a trailing comma, semicolon, or closing parenthesis while TRAILING_PUNCTUATION.include?(word.slice(-1)) trailing_punctuation << word.slice!(-1) end # Look at the word itself. See if there is a match in one of the # lists above. If so, substitute as directed. if WORDS.has_key?(word.downcase) reassembled = WORDS[word.downcase].gsub("'","\x27") elsif PLACES.has_key?(word.downcase) reassembled = PLACES[word.downcase].capitalize else reassembled = word end # Now recombine with any punctuation marks that were set aside. # ... start with any leading punctuation if (leading_punctuation.size == 1) reassembled = leading_punctuation.first.chr + reassembled end # ...then add back the trailing punctuation if trailing_punctuation.size > 0 trailing_punctuation.each do |l| reassembled += l.chr end end # now take the abbreviated word and add it back to the list tmp << reassembled end # recombine all the component words back into the case name # and then replace the selected text in the document puts tmp.join(" ")