# Quatitative Study Replication
## Imports and Data

In [1]:
import warnings
warnings.filterwarnings("ignore")

In [2]:
import pandas as pd
dat = pd.read_csv('StudyDataset.csv', encoding='windows-1252', index_col=0)

In [3]:
df = pd.read_csv('Quantitative Study Replication/Non-State Actors Dataset/nsa_v3.4_21November2013.asc', sep='\t', encoding='windows-1252')

In [4]:
df_3 = pd.read_csv('StudyDataset2.csv', encoding='windows-1252', index_col=0)

## Rows with more than one Rebel Group
The DCJ dataset contains rows that list more than one rebel group in the 'Side B' column. In order to deal with this, first, sample weights were introduced. These rows were then split into a row for each group, and weighted such that the total weight of the newly created rows for each original rows sums to one. 

In other words, if a row lists 2 groups, it will be split into 2 rows, each with one of the two groups and otherwise identical, weighted at 0.5 each.

In [5]:
dat

Unnamed: 0,acdid,year,gwno,location,sidea,sideb,incomp,territory,startdate,epstartdate,...,exile_erank,exile_sender,exile_scope,exile_scount,exile_implement,exile_rDCJ,exile_peaceagr,exile_start,exile_end,exile_perm
1,1,1946,145.0,Bolivia,Bolivia,Popular Revolutionary Movement,2,,1946-06-30,1946-06-30,...,,,,,,,,,,
2,1,1952,145.0,Bolivia,Bolivia,MNR,2,,1946-06-30,1952-04-09,...,,,,,,,,,,
3,1,1967,145.0,Bolivia,Bolivia,ELN,2,,1946-06-30,1967-03-31,...,,,,,,,,,,
4,2,1946,811.0,Cambodia (Kampuchea),France,Khmer Issarak,1,Cambodia,1946-08-31,1946-08-31,...,,,,,,,,,,
5,2,1947,811.0,Cambodia (Kampuchea),France,Khmer Issarak,1,Cambodia,1946-08-31,1946-08-31,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3406,271,2011,620.0,Libya,Libya,"NTC, Forces of Muammar Gaddafi",2,,2011-01-28,2011-03-04,...,,,,,,,,,,
3407,271,2011,620.0,Libya,Libya,"NTC, Forces of Muammar Gaddafi",2,,2011-01-28,2011-03-04,...,,,,,,,,,,
3408,271,2011,620.0,Libya,Libya,"NTC, Forces of Muammar Gaddafi",2,,2011-01-28,2011-03-04,...,,,,,,,,,,
3409,271,2011,620.0,Libya,Libya,"NTC, Forces of Muammar Gaddafi",2,,2011-01-28,2011-03-04,...,,,,,,,,,,


In [6]:
dat['model_weight'] = 1
indexes_to_delete = []
for i in dat.index:
 if len(dat.loc[i]['sideb'].split(',')) > 1:
 for actor in dat.loc[i]['sideb'].split(','):
 new_row = dat.loc[i]
 new_row['model_weight'] /= len(dat.loc[i]['sideb'].split(','))
 new_row['sideb'] = actor.strip()
 dat.loc[len(dat) + 1] = new_row
 indexes_to_delete.append(i)
dat
 

Unnamed: 0,acdid,year,gwno,location,sidea,sideb,incomp,territory,startdate,epstartdate,...,exile_sender,exile_scope,exile_scount,exile_implement,exile_rDCJ,exile_peaceagr,exile_start,exile_end,exile_perm,model_weight
1,1,1946,145.0,Bolivia,Bolivia,Popular Revolutionary Movement,2,,1946-06-30,1946-06-30,...,,,,,,,,,,1.0
2,1,1952,145.0,Bolivia,Bolivia,MNR,2,,1946-06-30,1952-04-09,...,,,,,,,,,,1.0
3,1,1967,145.0,Bolivia,Bolivia,ELN,2,,1946-06-30,1967-03-31,...,,,,,,,,,,1.0
4,2,1946,811.0,Cambodia (Kampuchea),France,Khmer Issarak,1,Cambodia,1946-08-31,1946-08-31,...,,,,,,,,,,1.0
5,2,1947,811.0,Cambodia (Kampuchea),France,Khmer Issarak,1,Cambodia,1946-08-31,1946-08-31,...,,,,,,,,,,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
5861,271,2011,620.0,Libya,Libya,Forces of Muammar Gaddafi,2,,2011-01-28,2011-03-04,...,,,,,,,,,,0.5
5862,271,2011,620.0,Libya,Libya,NTC,2,,2011-01-28,2011-03-04,...,,,,,,,,,,0.5
5863,271,2011,620.0,Libya,Libya,Forces of Muammar Gaddafi,2,,2011-01-28,2011-03-04,...,,,,,,,,,,0.5
5864,271,2011,620.0,Libya,Libya,NTC,2,,2011-01-28,2011-03-04,...,,,,,,,,,,0.5


In [7]:
dat.drop(index=indexes_to_delete, inplace=True)

## Different Spellings and Acronyms
Some of the spellings and acronyms used differ between the DCJ and NSA datasets. This step matches up the spellings so the datasets can be joined.

In [8]:
dict = {
'Peoples Liberation Army': 'PLA',
'Republic of Kurdistan/KDPI': 'KDPI',
'Indonesian Peoples Army': "Indonesian People's Army",
'Communist Party of the Philippines': 'CPP',
'Military Faction (forces of Honasan, Abenina & Zumel)': 'CPP, Military Faction (forces of Honasan, Abenina & Zumel)',
'LTS[p]A': 'LTS(p)A',
'Viet Nam Doc Dong Min Hoi': 'Viet minh',
'Opposition coalition [Febreristas, Liberals and Communists]': 'Opposition coalition (Febreristas, Liberals and Communists)',
'Military Faction (forces of General Rodriguez)': 'Military faction (forces of Andres Rodriguez)',
'Military faction (forces of General Alfredo Stroessner)': 'Military faction (forces of Alfredo Stroessner)',
'PVO - White Band faction': 'PVO - "White Band" faction',
"Arakan People's Liberation Party": 'APLP',
'Communist Party of Arakan': 'CPA',
'Rohingya Solidarity Organisation': 'RSO',
'Arakan Rohingya Islamic Front': 'ARIF',
'Naxalites/PWG': 'PWG',
'Naxalites/CPI [-Marxist]': 'CPI-ML',
'CPI–Maoist': 'CPI-Maoist',
'Military Faction [Navy]': 'Military faction (Navy)',
'Military Faction - 26th of July Movement': 'M-26-7',
'National Revolutionary Council': 'Cuban Revolutionary Council',
'Darul Islam Movement': 'Darul Islam',
'Military faction (forces of Eduardo A. Lonardi Doucet)': 'Military faction (forces of Samuel Toranzo Calderón), Military faction (forces of Eduardo A. Lonardi Doucet)',
'Supreme Council for the Islamic Revolution in Iraq (SCIRI)': 'SCIRI',
'ISI/Jama\'at Al-Tawhid wa Al-Jihad': 'ISI',
'RJF/Al-Jaysh al-Islami fi Iraq': 'RJF',
'Independent Nasserite Movement /Mourabitoun militia': 'Independent Nasserite Movement /Mourabitoun militia',
'Lebanese National Movement': 'LNM',
'Shan State Army - South (SSA-S)': 'SSA',
'Independent Mining State of South Kasai': 'Independent Mining State of South Kasai',
'Military faction (forces of Amsha Desta and Merid Negusie)': 'EPRDF, Military faction (forces of Amsha Desta and Merid Negusie)',
'CPN-M/UPF': 'CPN-M',
'KDP/DPK': 'KDP',
'North Kalimantan Liberation Army': 'North Kalimantan Liberation Army',
'Military faction (forces of Hugo Chávez)': 'Military faction (forces of Hugo Chávez)',
"Military faction (forces loyal to Léon M'Ba)": "Military faction (forces loyal to Léon M'Ba)",
'Military faction (forces loyal to Gervais Nyangoma)': 'Military faction (forces loyal to Gervais Nyangoma)',
'First Liberation Army': 'First Liberation Army',
'Second Liberation Army (Frolinat)': 'Second Liberation Army',
'Military faction (forces of Maldoum Bada Abbas)': 'MDD, Military faction (forces of Maldoum Bada Abbas)',
'Military faction (Constitutionalists)': 'Military faction (Constitutionalists)',
'Military faction (forces of Jerry John Rawlings)': 'Military faction (forces of Jerry John Rawlings)',
'Military faction (forces of Ekow Dennis and Edward Adjei-Ampofo) ': 'Military faction (forces of Ekow Dennis and Edward Adjei-Ampofo)',
'Military faction (forces of Patrick Nzeogwu)': 'Military faction (forces of Patrick Nzeogwu)',
'Boko Haram': "Jama'atu Ahlis Sunna Lidda'awati wal-Jihad",
'Military faction (forces loyal to Nureddin Atassi and Youssef Zeayen)': 'Military faction (forces loyal to Nureddin Atassi and Youssef Zeayen)',
'Khmer Rouge/PDK': 'KR',
'FUNCINPEC/ANS': 'FUNCINPEC',
'MIM/Mindanao Independence Movement': 'MIM',
'MNLF – NM': 'MNLF - NM',
'MNLF – HM': 'MNLF - HM',
'SLM/A': 'SLM/A',
'SLM/A – MM': 'SLM/A - MM',
'SPLM/A-North': 'SPLM/A-North',
'Military faction (forces of Mohamed Madbouh)': 'Military faction (forces of Mohamed Madbouh)',
'Mukti Bahini: Liberation Force': 'Mukti Bahini',
'Military faction (forces of Idi Amin)': 'Military faction (forces of Idi Amin)',
'Military faction (forces of Charles Arube)': 'Military faction (forces of Charles Arube)',
"Lord's Army": "Lord's Army",
'PIRA/IRA': 'PIRA',
'Military faction (forces of Benjamin Mejia)': 'Military faction (forces of Benjamin Mejia)',
'MLN or Tupamaros': 'MLN/Tupamaros',
'Military faction (forces of Augusto Pinochet, Toribio Merino and Leigh Guzman)': 'Military faction (forces of Augusto Pinochet, Toribio Merino and Leigh Guzman)',
'JSS/SB/Shanti Bahini': 'JSS/SB',
'BLA/Baluchistan Liberation Army': 'BLA',
'BRA/Baluchistan Republican Army': 'BRA',
'Hezb-i-Islami': 'Hizb-i Islami-yi Afghanistan',
'Hezb-i-Wahdat': 'Hizb-i Wahdat',
'Jamiat-i-Islami': "Jam'iyyat-i Islami-yi Afghanistan",
'Junbish-i Milli-yi Islami': 'Junbish-i Milli-yi Islami',
'Hizb-i Demokratik-i Khalq-i Afghanistan': 'PDPA',
'Harakat-i Inqilab-i Islami-yi Afghanistan': 'Harakat-i Inqilab-i Islami-yi Afghanistan',
'Mahaz-i Milli-yi Islami-yi Afghanistan': 'Mahaz-i Milli-yi Afghanistan',
'Jabha-yi Nijat-i Milli-yi Afghanistan': 'Jabha-yi Nijat-i Milli-yi Afghanistan',
'Ittihad-i Islami Bara-yi Azadi-yi Afghanistan': 'Ittihad-i Is',
'Harakat-i Islami-yi Afghanistan': 'Harakat-i Islami-yi Afghanistan',
'Hizb-i Islami-yi Afghanistan - Khalis faction': 'Hizb-i Islami-yi Afghanistan - Khalis faction',
'FDN/Contras': 'Contras/FDN',
'USC Faction': 'USC/SNA',
'ARS/UIC': 'ARS/UIC',
'Mujahideen e Khalq': 'MEK',
'Military faction (forces of Samuel Doe)': 'Military faction (forces of Samuel Doe)',
'Resistance Armee Tunisienne': 'Résistance Armée Tunisienne',
'Military faction (forces of Hezekiah Ochuka)': 'Military faction (forces of Hezekiah Ochuka)',
'PKK/Kadek': 'PKK',
'Yemenite Socialist Party - Abdul Fattah Ismail faction': 'Yemenite Socialist Party - Abdul Fattah Ismail faction',
'Military faction (forces of Moisés Giroldi)': 'Military faction (forces of Moisés Giroldi)',
'Military faction (forces of Himmler Rebu and Guy Francois)': 'Military faction (forces of Himmler Rebu and Guy Francois)',
'Military faction (forces of Raol Cédras)': 'Military faction (forces of Raol Cédras)',
'OP Lavalas (Chimères)': 'OP Lavalas (Chimères)',
'Government of Armenia and ANM': 'Republic of Armenia',
'Azerbaijani Popular Front': 'APF',
'FRUD – AD': 'FRUD - AD',
'Croatian irregulars': 'Croatian irregulars',
'Exile and Redemption': "Takfir wa'l Hijra",
'FLEC–FAC': 'FLEC-FAC',
'Republic of Nagorno-Karabakh': 'Republic of Nagorno-Karabakh',
'Serbian Republic of Bosnia and Herzegovina': 'Serbian Republic of Bosnia-Herzegovina',
'Serbian Republic of Krajina': 'Serbian Republic of Krajina',
'al-Gamaa al-Islamiyya': "al-Gama'a al-Islamiyya",
'Republic of Abkhazia': 'Republic of Abkhazia',
'Republic of South Ossetia': 'Republic of South Ossetia',
'Dniestr Republic': 'PMR',
'Movement for Peace in Tajikistan': 'Movement for Peace in Tajikistan',
'Husseinov Military Faction': 'Military faction (forces of Suret Husseinov)',
'OPON forces': 'OPON forces',
'Autonomous Province of Western Bosnia': 'Autonomous Province of Western Bosnia',
'Croatian Republic of Bosnia and Herzegovina': 'Croatian Republic of Bosnia-Herzegovina',
'Republic of Chechnya': 'Chechen Republic of Ichkeria',
'Military Junta for the Consolidation of Democracy, Peace and Justice': 'Military Junta for the Consolidation of Democracy, Peace and Justice',
'National Liberation Army (UCK)': 'UCK',
'al-Qaida [The Base]': 'al-Qaida',
'Forces of the Caucasus Emirate': 'Forces of the Caucasus Emirate',
'NDFB – RD': 'NDFB - RD',
'Republic of South Sudan': 'Republic of South Sudan',
'Forces of Muammar Gaddafi': 'Forces of Muammar Gaddafi'
}

In [9]:
df = df.groupby(['ucdpid', 'side_b']).first().reset_index()

In [10]:
df.replace(dict, inplace=True)

## Joining the Data
This cell merges the three datasets into one complete dataset

In [12]:
data = pd.merge(left=dat, right=df, left_on=['acdid','sideb'], right_on=['ucdpid', 'side_b'], suffixes=('_DCJ', '_NSA'), how='inner')
data = data.join(df_3.set_index(['acdid', 'styear', 'endyear', 'year']), on=['acdid', 'styear', 'endyear', 'year'], rsuffix='_DCJ2', how='left')

## Data Cleaning
There are a couple of data cleaning steps to get the data in the form that's described in the study:

### is_leftist_group
The original paper mentions 36 leftist groups. The full list of involved groups was given to Claude Sonnet 3.5, and it was asked to identify leftist groups. The list was then confirmed using basic google searches. Data was labeled 1/True if a group is a leftist group, or 0 if not.

In [13]:
data['is_leftist_group'] = pd.Series()
for i in data.index:
 if data.loc[i]['sideb'] in ['CPP', 'CPI', 'CPI-ML', 'PWG', 'CPI-Maoist', 'CPT', 'CPM', 'CPN-M', 'FARC', 'Sendero Luminoso', 'JVP', 'MEK', 'MCC', 'Viet minh', 'Pathet Lao', 'TPLF', 'EPRDF', 'Frelimo', 'MPLA', 'SWAPO', 'ZAPU', 'ZANU', 'MLN/Tupamaros', 'FSLN', 'PDPA', 'UNLF', 'PFLP', 'PFLP-GC', 'ELN', 'PLA', 'DSE', 'Huk', 'M-26-7', 'EPL', 'MIR', 'EPRP']:
 data['is_leftist_group'][i] = 1
 else:
 data['is_leftist_group'][i] = 0

### Trimming the Fat
'Dataset' contains just the necessary elements from 'data', and includes all the information needed for this analysis.

In [14]:
dataset = data[['acdid', 'location', 'sidea', 'sideb', 'model_weight', 'mobcap', 'fightcap', 'intens', 'polity', 'is_leftist_group', 'trial', 'truth', 'rep', 'amnesty', 'purge', 'exile', 'incomp', 'terrcont', 'year']] 

### year_of_conflict
This variable represents how many years a conflict has been ongoing, as described in the original study. It is increased by 1 for every consecutive year that the same conflict appears in the dataset.

In [15]:
dataset['year_of_conflict'] = pd.Series()
dataset['year_of_conflict'][0] = 1
i = 1
for idx in dataset.index[1:]:
 if list(dataset.loc[idx][['acdid', 'location', 'sidea', 'sideb']]) == list(dataset.loc[idx-1][['acdid', 'location', 'sidea', 'sideb']]) and dataset.loc[idx, 'year'] == dataset.loc[idx-1]['year'] + 1:
 i+=1
 elif list(dataset.loc[idx][['acdid', 'location', 'sidea', 'sideb']]) == list(dataset.loc[idx-1][['acdid', 'location', 'sidea', 'sideb']]) and dataset.loc[idx, 'year'] == dataset.loc[idx-1]['year']:
 pass
 else: 
 i = 1
 dataset['year_of_conflict'][idx] = i 

In [18]:
dataset

Unnamed: 0,acdid,location,sidea,sideb,model_weight,mobcap,fightcap,intens,polity,is_leftist_group,...,amnesty,purge,exile,incomp,terrcont,year,year_of_conflict,conciliatory,coercive,DCJ_used
0,1,Bolivia,Bolivia,Popular Revolutionary Movement,1.0,moderate,moderate,2,0,0,...,0,0,0,2,no,1946,1,0,0,0
1,1,Bolivia,Bolivia,MNR,1.0,moderate,moderate,1,0,0,...,0,0,0,2,no,1952,1,0,0,0
2,1,Bolivia,Bolivia,ELN,1.0,low,low,1,0,1,...,0,0,0,2,no,1967,1,0,0,0
3,2,Cambodia (Kampuchea),France,Khmer Issarak,1.0,low,low,1,0,0,...,0,0,0,1,yes,1946,1,0,0,0
4,2,Cambodia (Kampuchea),France,Khmer Issarak,1.0,low,low,1,0,0,...,0,0,0,1,yes,1947,2,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4302,271,Libya,Libya,Forces of Muammar Gaddafi,0.5,low,moderate,2,0,0,...,0,0,0,2,yes,2011,1,1,0,1
4303,271,Libya,Libya,Forces of Muammar Gaddafi,0.5,low,moderate,2,0,0,...,0,0,0,2,yes,2011,1,0,1,1
4304,271,Libya,Libya,Forces of Muammar Gaddafi,0.5,low,moderate,2,0,0,...,0,0,0,2,yes,2011,1,0,1,1
4305,271,Libya,Libya,Forces of Muammar Gaddafi,0.5,low,moderate,2,0,0,...,1,0,0,2,yes,2011,1,1,0,1


### Democratic Regime
The authors code their 'democratic regime' variable based on the polity2 scores found in the data, with 6 and up being considered a democratic regime. This process is repeated in the cell below, returning the results as a binary variable in place in the polity column.

In [16]:
dataset['polity'] = dataset['polity'].apply(lambda x : 1 if x >= 6 else 0)

### Target Variables
Creates columns for coercive judicial processes (1/True if trial, exile, or purge was used), conciliatory (1/True if truth, rep, or amnesty was used), and DCJ_used (1/True if coercive or conciliatory is true).

In [17]:
dataset['conciliatory'], dataset['coercive'] = pd.Series(), pd.Series()
dataset['DCJ_used'] = pd.Series()
for idx in dataset.index:
 if dataset.loc[idx, 'truth'] == 1\
 or dataset.loc[idx, 'rep'] == 1\
 or dataset.loc[idx, 'amnesty'] == 1:
 dataset['conciliatory'][idx] = 1
 dataset['coercive'][idx] = 0
 dataset['DCJ_used'][idx] = 1
 elif dataset.loc[idx, 'trial'] == 1\
 or dataset.loc[idx, 'exile'] == 1\
 or dataset.loc[idx, 'purge'] == 1:
 dataset['coercive'][idx] = 1
 dataset['conciliatory'][idx] = 0
 dataset['DCJ_used'][idx] = 1
 else:
 dataset['coercive'][idx] = 0
 dataset['conciliatory'][idx] = 0
 dataset['DCJ_used'][idx] = 0

### mobcap
Mobility Capacity is coded, per the authors, as a binary variable with moderate and above being 1/True.

In [16]:
mobcap_dict = {
 'low': 0,
 'no': 0,
 'moderate': 1,
 'high': 1
}
dataset['mobcap'].replace(mobcap_dict, inplace=True)

### Incompatability
Incomp is rated 1 in the original data if the incompatability is territory, and 2 if it is government. Because we're only interested in territory as the 'True' category, we'll just change the 2s to 0s.

In [17]:
dataset['incomp'].replace({2:0}, inplace=True)

### Territorial Control
This variable just needs to be converted from 'yes'/'no' to 1/0 notation

In [18]:
dataset['terrcont'].replace({'yes': 1, 'no': 0}, inplace=True)

### Cold War
Adds a cold_war column based on whether the year is before or after 1989, as per the study

In [19]:
dataset['cold_war'] = pd.Series()
for idx in dataset.index:
 if dataset['year'][idx] >= 1989: dataset['cold_war'][idx] = 0
 else: dataset['cold_war'][idx] = 1
 

### Intensity
The authors use a binary variable for intensity, which is coded as 1- not intense and 2- intense in the original data. We can just subtract 1 from each row to get to the binary notation we need.

In [20]:
dataset['intens'] -= 1

### Making everything True/False
This cell codes all binary variables as boolean true/false variables to prepare the data for regression.

In [21]:
for i in ['mobcap', 'intens', 'polity', 'is_leftist_group', 'trial', 'truth', 'rep', 'amnesty', 'exile', 'purge', 'conciliatory', 'coercive', 'cold_war', 'incomp', 'terrcont', 'DCJ_used']:
 dataset[i].replace({1: True, 0: False}, inplace=True)

### Getting dummy columns for fightcap
Fighting capacity remains as a 4 category column in the data, so we'll get dummies to one-hot encode the data. Each category will end up in it's own column.

In [22]:
dataset = pd.get_dummies(dataset, columns=['fightcap'])

### Saving the clean dataset

In [26]:
dataset.to_csv('CleanData.csv')

## Modeling

In [24]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression(penalty=None)

### Filling NAs
There are just a couple of NA rows in the data, we'll fill them with False.

In [25]:
dataset['terrcont'].fillna(False, inplace=True)
dataset['mobcap'].fillna(False, inplace=True)

### Model 1: All DCJ Processes

In [27]:
logreg.fit(dataset[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset['DCJ_used'], sample_weight=dataset['model_weight'])

In [28]:
coefs = pd.DataFrame(logreg.coef_)
for i in range(11):
 coefs.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1, inplace=True)
coefs.index = ['All DCJ Processes']

### Model 2: Conciliatory Process Use

In [29]:
logreg2 = LogisticRegression(penalty=None)
logreg2.fit(dataset[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset['conciliatory'], sample_weight=dataset['model_weight'])
coefs2 = pd.DataFrame(logreg2.coef_)
for i in range(11):
 coefs2.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1, inplace=True)
coefs2.index = ['Conciliatory Processes']

### Model 3: Coercive Process Use

In [30]:
logreg3 = LogisticRegression(penalty=None)
logreg3.fit(dataset[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset['coercive'], sample_weight=dataset['model_weight'])
coefs3 = pd.DataFrame(logreg3.coef_)
for i in range(11):
 coefs3.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1, inplace=True)
coefs3.index = ['Coercive Processes']

### Model 4: Conciliatory Processes vs. Coercive Processes
These next two analyses only includes rows where a DCJ was used, and in order to compare conciliatory process use directly to coercive process use among conflict years where a DCJ was used.

In [31]:
dataset2 = dataset[dataset['DCJ_used'] == True]
logreg4 = LogisticRegression(penalty=None)
logreg4.fit(dataset2[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset2['conciliatory'], sample_weight=dataset2['model_weight'])
coefs4 = pd.DataFrame(logreg4.coef_)
for i in range(11):
 coefs4.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1, inplace=True)
coefs4.index = ['Conciliatory Processes vs. Coercive']

### Model 5: Coercive processes vs. Conciliatory processes

In [32]:
logreg5 = LogisticRegression(penalty=None)
logreg5.fit(dataset2[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset2['coercive'], sample_weight=dataset2['model_weight'])
coefs5 = pd.DataFrame(logreg5.coef_)
for i in range(11):
 coefs5.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1, inplace=True)
coefs5.index = ['Coercive Processes vs. Conciliatory']

In [33]:
coefs['constant'] = logreg.intercept_
coefs2['constant'] = logreg2.intercept_
coefs3['constant'] = logreg3.intercept_
coefs4['constant'] = logreg4.intercept_
coefs5['constant'] = logreg5.intercept_

In [34]:
from sklearn.metrics import log_loss
coefs['log_likelihood'] = -log_loss(logreg.predict(dataset[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset['DCJ_used'])
coefs2['log_likelihood'] = -log_loss(logreg2.predict(dataset[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset['conciliatory'])
coefs3['log_likelihood'] = -log_loss(logreg3.predict(dataset[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset['coercive'])
coefs4['log_likelihood'] = -log_loss(logreg4.predict(dataset2[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset2['conciliatory'])
coefs5['log_likelihood'] = -log_loss(logreg5.predict(dataset2[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset2['coercive'])

In [35]:
results = pd.concat([coefs, coefs2, coefs3, coefs4, coefs5]).T

In [36]:
results

Unnamed: 0,1,2,3,4,5
mobcap,0.661976,0.388605,0.24104,0.240883,-0.240883
fightcap_high,-0.449099,0.45728,-0.829236,0.95888,-0.958879
fightcap_moderate,0.103389,-0.053578,0.222631,-0.200837,0.200837
fightcap_low,0.167188,-0.516068,0.515424,-0.650171,0.650172
intens,0.634839,0.191754,0.336639,-0.070971,0.070971
polity,1.551596,-0.125052,1.304639,-0.743485,0.743485
is_leftist_group,-0.472057,-0.310735,-0.119309,-0.307852,0.307852
incomp,-0.976216,-0.347354,-0.527997,-0.059748,0.059748
terrcont,-0.769757,0.134406,-0.792893,0.599034,-0.599034
year_of_conflict,0.079885,0.024635,0.030991,0.01448,-0.01448


In [38]:
dataset3 = dataset[dataset['conciliatory'] == True]
logreg6 = LogisticRegression(penalty=None)
logreg6.fit(dataset3[
 ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group',
 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset3['truth'],
 sample_weight=dataset3['model_weight'])
coefs6 = pd.DataFrame(logreg6.coef_)
for i in range(11):
 coefs6.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity',
 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1,
 inplace=True)
coefs6.index = ['Truth Commissions']

In [39]:
logreg7 = LogisticRegression(penalty=None)
logreg7.fit(dataset3[
 ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group',
 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset3['rep'],
 sample_weight=dataset3['model_weight'])
coefs7 = pd.DataFrame(logreg7.coef_)
for i in range(11):
 coefs7.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity',
 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1,
 inplace=True)
coefs7.index = ['Reparations']

In [40]:
logreg8 = LogisticRegression(penalty=None)
logreg8.fit(dataset3[
 ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group',
 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset3['amnesty'],
 sample_weight=dataset3['model_weight'])
coefs8 = pd.DataFrame(logreg8.coef_)
for i in range(11):
 coefs8.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity',
 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1,
 inplace=True)
coefs8.index = ['Amnesty']

In [41]:
dataset4 = dataset[dataset['coercive'] == True]
logreg9 = LogisticRegression(penalty=None)
logreg9.fit(dataset4[
 ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group',
 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset4['trial'],
 sample_weight=dataset4['model_weight'])
coefs9 = pd.DataFrame(logreg9.coef_)
for i in range(11):
 coefs9.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity',
 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1,
 inplace=True)
coefs9.index = ['Trials']

In [42]:
logreg10 = LogisticRegression(penalty=None)
logreg10.fit(dataset4[
 ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group',
 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset4['exile'],
 sample_weight=dataset4['model_weight'])
coefs10 = pd.DataFrame(logreg10.coef_)
for i in range(11):
 coefs10.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity',
 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1,
 inplace=True)
coefs10.index = ['Exiles']

In [43]:
logreg11 = LogisticRegression(penalty=None)
logreg11.fit(dataset4[
 ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group',
 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']], dataset4['purge'],
 sample_weight=dataset4['model_weight'])
coefs11 = pd.DataFrame(logreg11.coef_)
for i in range(11):
 coefs11.rename({i: ['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity',
 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war'][i]}, axis=1,
 inplace=True)
coefs11.index = ['Purges']

In [44]:
coefs6['constant'] = logreg6.intercept_
coefs7['constant'] = logreg7.intercept_
coefs8['constant'] = logreg8.intercept_
coefs9['constant'] = logreg9.intercept_
coefs10['constant'] = logreg10.intercept_
coefs11['constant'] = logreg11.intercept_

In [47]:
dataset3

Unnamed: 0,acdid,location,sidea,sideb,model_weight,mobcap,intens,polity,is_leftist_group,trial,...,terrcont,year,year_of_conflict,conciliatory,coercive,DCJ_used,cold_war,fightcap_high,fightcap_low,fightcap_moderate
31,6,Iran,Iran,KDPI,1.0,False,True,False,False,False,...,False,1979,1,True,False,True,True,False,True,False
32,6,Iran,Iran,KDPI,1.0,False,True,False,False,False,...,True,1979,1,True,False,True,True,False,True,False
33,6,Iran,Iran,KDPI,1.0,False,True,False,False,False,...,False,1979,1,True,False,True,True,False,True,False
34,6,Iran,Iran,KDPI,1.0,False,True,False,False,False,...,True,1979,1,True,False,True,True,False,True,False
43,6,Iran,Iran,KDPI,1.0,False,False,False,False,False,...,False,1984,6,True,False,True,True,False,True,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4295,271,Libya,Libya,NTC,0.5,True,True,False,False,False,...,True,2011,1,True,False,True,False,False,False,True
4298,271,Libya,Libya,NTC,0.5,True,True,False,False,False,...,True,2011,1,True,False,True,False,False,False,True
4301,271,Libya,Libya,Forces of Muammar Gaddafi,0.5,False,True,False,False,False,...,True,2011,1,True,False,True,False,False,False,True
4302,271,Libya,Libya,Forces of Muammar Gaddafi,0.5,False,True,False,False,False,...,True,2011,1,True,False,True,False,False,False,True


In [61]:
coefs6['log_likelihood'] = -log_loss(y_pred=logreg6.predict(dataset3[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), y_true=dataset3['truth'], labels=[True, False])
coefs7['log_likelihood'] = -log_loss(logreg7.predict(dataset3[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset3['rep'])
coefs8['log_likelihood'] = -log_loss(logreg8.predict(dataset3[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset3['amnesty'])
coefs9['log_likelihood'] = -log_loss(logreg9.predict(dataset4[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset4['trial'], labels=[True, False])
coefs10['log_likelihood'] = -log_loss(logreg10.predict(dataset4[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset4['exile'], labels=[True, False])
coefs11['log_likelihood'] = -log_loss(logreg11.predict(dataset4[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]), dataset4['purge'], labels=[True, False])

In [62]:
results = pd.concat([coefs, coefs2, coefs3, coefs4, coefs5, coefs6, coefs7, coefs8, coefs9, coefs10, coefs11]).T

In [63]:
results

Unnamed: 0,1,2,3,4,5,Truth Commissions,Reparations,Amnesty,Trials,Exiles,Purges
mobcap,0.661976,0.388605,0.24104,0.240883,-0.240883,0.38871,0.115131,-0.258683,0.4906,-0.475132,-0.460048
fightcap_high,-0.449099,0.45728,-0.829236,0.95888,-0.958879,9.692238,-1.852436,0.296176,-1.269715,-4.782811,7.57678
fightcap_moderate,0.103389,-0.053578,0.222631,-0.200837,0.200837,8.170877,-0.838711,0.501842,-0.671203,-0.996337,6.76742
fightcap_low,0.167188,-0.516068,0.515424,-0.650171,0.650172,8.116678,-1.29309,0.898837,-0.252559,-0.554519,5.739381
intens,0.634839,0.191754,0.336639,-0.070971,0.070971,0.289315,0.222522,-0.324709,-0.35191,0.299519,0.431241
polity,1.551596,-0.125052,1.304639,-0.743485,0.743485,0.703958,0.497264,-0.732421,0.362496,-0.065839,-0.673826
is_leftist_group,-0.472057,-0.310735,-0.119309,-0.307852,0.307852,-1.53677,0.749356,-0.194584,0.710211,-1.167977,-0.176958
incomp,-0.976216,-0.347354,-0.527997,-0.059748,0.059748,0.04345,0.155515,-0.156774,-0.032337,0.021043,-0.007348
terrcont,-0.769757,0.134406,-0.792893,0.599034,-0.599034,-0.333139,-0.730111,0.747777,-0.112496,0.529108,-0.322486
year_of_conflict,0.079885,0.024635,0.030991,0.01448,-0.01448,0.018288,0.026035,-0.029845,-0.003418,0.015373,-0.016281


Note: Truth, exile and purge each contain less than 100 "true" rows

In [71]:
import statsmodels.api as sm

In [75]:
# Add a constant term to the features (intercept)
X_with_intercept = sm.add_constant(np.array(dataset3[['mobcap', 'fightcap_high', 'fightcap_moderate', 'fightcap_low', 'intens', 'polity', 'is_leftist_group', 'incomp', 'terrcont', 'year_of_conflict', 'cold_war']]))

# Fit the model using statsmodels
logit_model = sm.Logit(np.array(), X_with_intercept).fit()

# Get the p-values
p_values = logit_model.pvalues

print(p_values)

TypeError: ufunc 'isfinite' not supported for the input types, and the inputs could not be safely coerced to any supported types according to the casting rule ''safe''

In [74]:
import numpy as np

In [76]:
dataset5 = dataset.replace({True: 1, False: 0})

In [78]:
dataset5.to_csv('CleanData1_0.csv')

In [79]:
dataset5[dataset5['conciliatory'] == 1].to_csv('CleanData_conc.csv')
dataset5[dataset5['coercive'] == 1].to_csv('CleanData_coer.csv')

In [22]:
df_3

Unnamed: 0,acdid,year,location,gwno,region,epid,styear,endyear,epend,dcjdummy,...,epdum_govdcj,epdum_rebdcj,polity,regime,rebstrength,bdeadbes,bdeadchgrel,chgbdeadrel,outcome,conflterm
1,100,1966,Nigeria,475.0,4,100_1966,1966,1966,1,0,...,0,0,-7.0,3.0,4.0,20.0,,,4.0,2.0
2,100,2009,Nigeria,475.0,4,100_2009,2009,2009,0,3,...,1,0,4.0,2.0,1.0,,,,,
3,100,2011,Nigeria,475.0,4,100_2011,2011,2011,0,14,...,1,0,4.0,2.0,1.0,,,,,
4,101,1966,South Africa,560.0,4,101_1966-1988,1966,1988,0,0,...,1,0,4.0,2.0,2.0,,,,,
5,101,1967,South Africa,560.0,4,101_1966-1988,1966,1988,0,1,...,1,0,4.0,2.0,2.0,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1933,9,1949,Laos,812.0,3,9_1946-1953,1946,1953,0,0,...,0,0,,,1.0,,,,,
1934,9,1950,Laos,812.0,3,9_1946-1953,1946,1953,0,0,...,0,0,,,1.0,,,,,
1935,9,1951,Laos,812.0,3,9_1946-1953,1946,1953,0,0,...,0,0,,,1.0,,,,,
1936,9,1952,Laos,812.0,3,9_1946-1953,1946,1953,0,0,...,0,0,,,1.0,,,,,
