//Package garabic provides a set of functions for Arabic text processing in golang
package garabic

import (
	"bytes"
	"fmt"
	"strings"
	"unicode"

	"golang.org/x/text/runes"
	"golang.org/x/text/transform"
)

//letterGroup represents the letter and bounding letters
type letterGroup struct {
	backLetter  rune
	letter      rune
	frontLetter rune
}

//letterShape represents all shapes of arabic letters in a word
// https://web.stanford.edu/dept/lc/arabic/alphabet/incontextletters.html
type letterShape struct {
	Independent, Initial, Medial, Final rune
}

//Map of different shapes of arabic alphabet
var arabicAlphabetShapes = map[rune]letterShape{
	// Letter (ﺃ)
	'\u0623': {Independent: '\uFE83', Initial: '\u0623', Medial: '\uFE84', Final: '\uFE84'},
	// Letter (ﺍ)
	'\u0627': {Independent: '\uFE8D', Initial: '\u0627', Medial: '\uFE8E', Final: '\uFE8E'},
	// Letter (ﺁ)
	'\u0622': {Independent: '\uFE81', Initial: '\u0622', Medial: '\uFE82', Final: '\uFE82'},
	// Letter (ﺀ)
	'\u0621': {Independent: '\uFE80', Initial: '\u0621', Medial: '\u0621', Final: '\u0621'},
	// Letter (ﺅ)
	'\u0624': {Independent: '\uFE85', Initial: '\u0624', Medial: '\uFE86', Final: '\uFE86'},
	// Letter (ﺇ)
	'\u0625': {Independent: '\uFE87', Initial: '\u0625', Medial: '\uFE88', Final: '\uFE88'},
	// Letter (ﺉ)
	'\u0626': {Independent: '\uFE89', Initial: '\uFE8B', Medial: '\uFE8C', Final: '\uFE8A'},
	// Letter (ﺏ)
	'\u0628': {Independent: '\uFE8F', Initial: '\uFE91', Medial: '\uFE92', Final: '\uFE90'},
	// Letter (ﺕ)
	'\u062A': {Independent: '\uFE95', Initial: '\uFE97', Medial: '\uFE98', Final: '\uFE96'},
	// Letter (ﺓ)
	'\u0629': {Independent: '\uFE93', Initial: '\u0629', Medial: '\u0629', Final: '\uFE94'},
	// Letter (ﺙ)
	'\u062B': {Independent: '\uFE99', Initial: '\uFE9B', Medial: '\uFE9C', Final: '\uFE9A'},
	// Letter (ﺝ)
	'\u062C': {Independent: '\uFE9D', Initial: '\uFE9F', Medial: '\uFEA0', Final: '\uFE9E'},
	// Letter (ﺡ)
	'\u062D': {Independent: '\uFEA1', Initial: '\uFEA3', Medial: '\uFEA4', Final: '\uFEA2'},
	// Letter (ﺥ)
	'\u062E': {Independent: '\uFEA5', Initial: '\uFEA7', Medial: '\uFEA8', Final: '\uFEA6'},
	// Letter (ﺩ)
	'\u062F': {Independent: '\uFEA9', Initial: '\u062F', Medial: '\uFEAA', Final: '\uFEAA'},
	// Letter (ﺫ)
	'\u0630': {Independent: '\uFEAB', Initial: '\u0630', Medial: '\uFEAC', Final: '\uFEAC'},
	// Letter (ﺭ)
	'\u0631': {Independent: '\uFEAD', Initial: '\u0631', Medial: '\uFEAE', Final: '\uFEAE'},
	// Letter (ﺯ)
	'\u0632': {Independent: '\uFEAF', Initial: '\u0632', Medial: '\uFEB0', Final: '\uFEB0'},
	// Letter (ﺱ)
	'\u0633': {Independent: '\uFEB1', Initial: '\uFEB3', Medial: '\uFEB4', Final: '\uFEB2'},
	// Letter (ﺵ)
	'\u0634': {Independent: '\uFEB5', Initial: '\uFEB7', Medial: '\uFEB8', Final: '\uFEB6'},
	// Letter (ﺹ)
	'\u0635': {Independent: '\uFEB9', Initial: '\uFEBB', Medial: '\uFEBC', Final: '\uFEBA'},
	// Letter (ﺽ)
	'\u0636': {Independent: '\uFEBD', Initial: '\uFEBF', Medial: '\uFEC0', Final: '\uFEBE'},
	// Letter (ﻁ)
	'\u0637': {Independent: '\uFEC1', Initial: '\uFEC3', Medial: '\uFEC4', Final: '\uFEC2'},
	// Letter (ﻅ)
	'\u0638': {Independent: '\uFEC5', Initial: '\uFEC7', Medial: '\uFEC8', Final: '\uFEC6'},
	// Letter (ﻉ)
	'\u0639': {Independent: '\uFEC9', Initial: '\uFECB', Medial: '\uFECC', Final: '\uFECA'},
	// Letter (ﻍ)
	'\u063A': {Independent: '\uFECD', Initial: '\uFECF', Medial: '\uFED0', Final: '\uFECE'},
	// Letter (ﻑ)
	'\u0641': {Independent: '\uFED1', Initial: '\uFED3', Medial: '\uFED4', Final: '\uFED2'},
	// Letter (ﻕ)
	'\u0642': {Independent: '\uFED5', Initial: '\uFED7', Medial: '\uFED8', Final: '\uFED6'},
	// Letter (ﻙ)
	'\u0643': {Independent: '\uFED9', Initial: '\uFEDB', Medial: '\uFEDC', Final: '\uFEDA'},
	// Letter (ﻝ)
	'\u0644': {Independent: '\uFEDD', Initial: '\uFEDF', Medial: '\uFEE0', Final: '\uFEDE'},
	// Letter (ﻡ)
	'\u0645': {Independent: '\uFEE1', Initial: '\uFEE3', Medial: '\uFEE4', Final: '\uFEE2'},
	// Letter (ﻥ)
	'\u0646': {Independent: '\uFEE5', Initial: '\uFEE7', Medial: '\uFEE8', Final: '\uFEE6'},
	// Letter (ﻩ)
	'\u0647': {Independent: '\uFEE9', Initial: '\uFEEB', Medial: '\uFEEC', Final: '\uFEEA'},
	// Letter (ﻭ)
	'\u0648': {Independent: '\uFEED', Initial: '\u0648', Medial: '\uFEEE', Final: '\uFEEE'},
	// Letter (ﻱ)
	'\u064A': {Independent: '\uFEF1', Initial: '\uFEF3', Medial: '\uFEF4', Final: '\uFEF2'},
	// Letter (ﻯ)
	'\u0649': {Independent: '\uFEEF', Initial: '\u0649', Medial: '\uFEF0', Final: '\uFEF0'},
	// Letter (ـ)
	'\u0640': {Independent: '\u0640', Initial: '\u0640', Medial: '\u0640', Final: '\u0640'},
	// Letter (ﻻ)
	'\uFEFB': {Independent: '\uFEFB', Initial: '\uFEFB', Medial: '\uFEFC', Final: '\uFEFC'},
	// Letter (ﻷ)
	'\uFEF7': {Independent: '\uFEF7', Initial: '\uFEF7', Medial: '\uFEF8', Final: '\uFEF8'},
	// Letter (ﻹ)
	'\uFEF9': {Independent: '\uFEF9', Initial: '\uFEF9', Medial: '\uFEFA', Final: '\uFEFA'},
	// Letter (ﻵ)
	'\uFEF5': {Independent: '\uFEF5', Initial: '\uFEF5', Medial: '\uFEF6', Final: '\uFEF6'},
}

// Normalizable Arabic letters
var normalizable = &unicode.RangeTable{
	R16: []unicode.Range16{
		/*
			Arabic Harakat (Harakat تَشْكِيل)
		*/
		//Tatweel => ـ
		{0x0640, 0x0640, 1},
		//TanwinFatḥah
		{0x064B, 0x64B, 1},
		//TanwinDammah
		{0x064C, 0x64C, 1},
		//TanwinKasrah
		{0x064D, 0x64D, 1},
		//Fatḥah
		{0x064E, 0x64E, 1},
		//Dammah
		{0x064F, 0x64F, 1},
		//Kasrah
		{0x0650, 0x650, 1},
		//Shaddah
		{0x0651, 0x651, 1},
		//Sukun
		{0x0652, 0x652, 1},
		//DaggerAlif =>
		{0x0670, 0x0670, 1},
	},
}

// Normalizable letters [alef/Yae/Hae]
const (
	//Alef  => ا
	Alef = '\u0627'
	//AlefMad =>  آ
	AlefMad = '\u0622'
	//AlefHamzaAbove => أ
	AlefHamzaAbove = '\u0623'
	//AlefHamzaBelow إ
	AlefHamzaBelow = '\u0625'
	//Yae =>  ي
	Yae = '\u064A'
	//DotlessYae =>  ى
	DotlessYae = '\u0649'
	//TehMarbuta => ة
	TehMarbuta = '\u0629'
	//Hae => ه
	Hae = '\u0647'
	//AlefWaslah ٱ / Waslah is considered part of harakat/تَشْكِيل ?
	AlefWaslah = '\u0671'
)

//Number groups in Arabic
var _zeroToNine = []string{
	"صفر", "واحد", "اثنان", "ثلاثة", "أربعة",
	"خمسة", "ستة", "سبعة", "ثمانية", "تسعة",
}

var _elevenToNineteen = []string{
	"عشرة", "أحد عشر", "اثنا عشر", "ثلاثة عشر", "أربعة عشر",
	"خمسة عشر", "ستة عشر", "سبعة عشر", "ثمانية عشر", "تسعة عشر",
}

var _tens = []string{
	"", "", "عشرون", "ثلاثون", "أربعون", "خمسون",
	"ستون", "سبعون", "ثمانون", "تسعون",
}
var _hundreds = []string{
	"", "مئة", "مئتان", "ثلاثمئة", "أربعمئة", "خمسمئة", "ستمئة", "سبعمئة", "ثمانمئة", "تسعمئة",
}
var _scaleNumbers = []string{
	"", "ألف", "مليون", "مليار",
}

//RemoveHarakat will remove harakat from arabic text
func RemoveHarakat(input string) string {
	input = normalizeTransform(input)
	runes := bytes.Runes([]byte(input))
	for i := 0; i < len(runes); i++ {
		//fmt.Println(string(runes[i]))
		switch runes[i] {
		//Remove Waslah from AlefWaslah / Waslah is considered part of harakat/تَشْكِيل ?
		case AlefWaslah:
			runes[i] = Alef
		}
	}
	return string(runes)
}

//Normalize will prepare an arabic text to search and index
func Normalize(input string) string {
	input = normalizeTransform(input)
	runes := bytes.Runes([]byte(input))
	for i := 0; i < len(runes); i++ {
		//fmt.Println(string(runes[i]))
		switch runes[i] {
		//Normalizable letters
		case AlefMad, AlefHamzaAbove, AlefHamzaBelow, AlefWaslah:
			runes[i] = Alef
		case DotlessYae:
			runes[i] = Yae
		case TehMarbuta:
			runes[i] = Hae
		}
	}
	//@TODO: optimize runes by converting it to bytes, arabic letters use only 2 bytes
	return string(runes)
}

// Use text/transform algorithm for faster normalization
func normalizeTransform(input string) string {
	//Use text/transform algorithm for faster normalization
	tm := transform.Chain(runes.Remove(runes.In(normalizable)))
	input, _, _ = transform.String(tm, input)
	return input
}

//deleteRune will delete a rune from the slice while keeping the order of runes
func deleteRune(runes []rune, i int) []rune {
	if i >= len(runes) {
		return runes
	}
	runes = append(runes[:i], runes[i+1:]...)
	return runes
}

// SpellNumber will transform a number into a readable arabic version
func SpellNumber(input int) string {

	var stringOfNum []string

	if input < 0 {
		stringOfNum = append(stringOfNum, "سالب")
		input *= -1
	}

	if input < 10 {
		stringOfNum = append(stringOfNum, _zeroToNine[input])
		return strings.TrimSpace(strings.Join(stringOfNum, " "))
	}

	groups := []int{}

	for input > 0 {
		groups = append(groups, input%1000)
		input = input / 1000
	}

	for i := len(groups) - 1; i >= 0; i-- {
		//Get each group with its decimal position
		group := groups[i]
		if group == 0 {
			continue
		}

		// [0 0 x]
		hundreds := group / 100 % 10
		// [0 x 0]
		tens := group / 10 % 10
		// [x 0 0]
		zeros := group % 10

		if hundreds > 0 {
			if i == len(groups)-1 {
				stringOfNum = append(stringOfNum, _hundreds[hundreds])
			} else {
				stringOfNum = append(stringOfNum, "و", _hundreds[hundreds])
			}
		}

		//Move to scale number
		if tens == 0 && zeros == 0 {
			goto scale
		}

		switch tens {
		case 0:
			if zeros > 1 {
				stringOfNum = append(stringOfNum, _zeroToNine[zeros])
			}
		case 1:
			stringOfNum = append(stringOfNum, _elevenToNineteen[zeros])
			break
		default:
			if zeros > 0 {
				word := fmt.Sprintf("و %s و %s", _zeroToNine[zeros], _tens[tens])
				stringOfNum = append(stringOfNum, word)
			} else {
				if len(stringOfNum) > 1 {
					stringOfNum = append(stringOfNum, "و", _tens[tens])
				} else {
					stringOfNum = append(stringOfNum, _tens[tens])
				}
			}
			break
		}

		// Scale position
	scale:
		if mega := _scaleNumbers[i]; mega != "" {
			stringOfNum = append(stringOfNum, mega)
		}
	}

	return strings.TrimSpace(strings.Join(stringOfNum, " "))
}

// Tashkeel will add matching diacritics to arabic text
func Tashkeel(input string) string {
	JarrWords := []string{"من", "الي", "عن", "على", "مذ", "خلا", "عدا", "حاشا"}
	words := strings.Fields(input)
	for i, word := range words {
		// يُجَرُّ الاسم إذا سُبِق بأحد حروف جرٍّ، مثل كلمة الشركة في جملة: توجّهْتُ إلى الشركةِ
		fmt.Println(Normalize(word))
		if contains(JarrWords, Normalize(word)) {
			words[i+1] += string('\u0650')
		}
	}
	return strings.Join(words, " ")
}

// contains checks if a string is present in a slice
func contains(s []string, str string) bool {
	for _, v := range s {
		if v == str {
			return true
		}
	}
	return false
}

//Shape will reconstruct arabic text to be connected correctly
func Shape(input string) string {
	var langSections []string
	var continousLangAr string
	var continousLangLt string

	for _, letter := range input {
		if IsArabicLetter(letter) {
			if len(continousLangLt) > 0 {
				langSections = append(langSections, strings.TrimSpace(continousLangLt))
			}
			continousLangLt = ""
			continousLangAr += string(letter)
		} else {
			if len(continousLangAr) > 0 {
				langSections = append(langSections, strings.TrimSpace(continousLangAr))
			}
			continousLangAr = ""
			continousLangLt += string(letter)
		}
	}
	if len(continousLangLt) > 0 {
		fmt.Println(continousLangLt)
		langSections = append(langSections, strings.TrimSpace(continousLangLt))
	}
	if len(continousLangAr) > 0 {
		fmt.Printf("\"%s\"\n", continousLangAr)
		langSections = append(langSections, strings.TrimSpace(continousLangAr))
	}

	var shapedSentence []string
	for _, section := range langSections {
		if IsArabic(section) {
			for _, word := range strings.Fields(section) {
				shapedSentence = append(shapedSentence, shapeWord(word))
			}
		} else {
			shapedSentence = append(shapedSentence, section)
		}
	}
	//Reverse words
	for i := len(shapedSentence)/2 - 1; i >= 0; i-- {
		opp := len(shapedSentence) - 1 - i
		shapedSentence[i], shapedSentence[opp] = shapedSentence[opp], shapedSentence[i]
	}
	return strings.Join(shapedSentence, " ")
}

//shapeWord will reconstruct an arabic word to be connected correctly
func shapeWord(input string) string {
	if !IsArabic(input) {
		return input
	}

	var shapedInput bytes.Buffer

	//Convert input into runes
	inputRunes := []rune(RemoveHarakat(input))
	for i := range inputRunes {
		//Get Bounding back and front letters
		var backLetter, frontLetter rune
		if i-1 >= 0 {
			backLetter = inputRunes[i-1]
		}
		if i != len(inputRunes)-1 {
			frontLetter = inputRunes[i+1]
		}
		//Fix the letter based on bounding letters
		if _, ok := arabicAlphabetShapes[inputRunes[i]]; ok {
			adjustedLetter := adjustLetter(letterGroup{backLetter, inputRunes[i], frontLetter})
			shapedInput.WriteRune(adjustedLetter)
		} else {
			shapedInput.WriteRune(inputRunes[i])
		}
	}

	//In case no Tashkeel deteted, same size of runes
	if len([]rune(shapedInput.String())) == len([]rune(input)) {
		return reverse(shapedInput.String())
	}

	var shapedInputTashkeel bytes.Buffer
	inputTashkeelRunes := []rune(input)

	letterIndex := 0
	//Restore Tashkeel
	for i := range inputTashkeelRunes {
		if _, ok := arabicAlphabetShapes[inputTashkeelRunes[i]]; ok {
			shapedInputTashkeel.WriteRune([]rune(shapedInput.String())[letterIndex])
			letterIndex++
		} else {
			shapedInputTashkeel.WriteRune(inputTashkeelRunes[i])
		}
	}

	return reverse(shapedInputTashkeel.String())

}

//reverse the arabic string for RTL support in rendering
func reverse(s string) string {
	runes := []rune(s)
	for i, j := 0, len(runes)-1; i < j; i, j = i+1, j-1 {
		runes[i], runes[j] = runes[j], runes[i]
	}
	return string(runes)
}

//adjustLetter will adjust the arabic letter depending on its position
func adjustLetter(g letterGroup) rune {

	switch {
	//Inbetween 2 letters
	case g.backLetter > 0 && g.frontLetter > 0:
		if isAlwaysInitial(g.backLetter) {
			return arabicAlphabetShapes[g.letter].Initial
		}
		return arabicAlphabetShapes[g.letter].Medial

	//Not preceded by any letter
	case g.backLetter == 0 && g.frontLetter > 0:
		return arabicAlphabetShapes[g.letter].Initial

	//Not followed by any letter
	case g.backLetter > 0 && g.frontLetter == 0:
		if isAlwaysInitial(g.backLetter) {
			return arabicAlphabetShapes[g.letter].Independent
		}
		return arabicAlphabetShapes[g.letter].Final

	default:
		return arabicAlphabetShapes[g.letter].Independent
	}
}

//Check if the letter is always .Initial
func isAlwaysInitial(letter rune) bool {
	alwaysInitial := [13]rune{'\u0627', '\u0623', '\u0622', '\u0625', '\u0649', '\u0621', '\u0624', '\u0629', '\u062f', '\u0630', '\u0631', '\u0632', '\u0648'}
	for _, item := range alwaysInitial {
		if item == letter {
			return true
		}
	}
	return false
}

//IsArabicLetter checks if the letter is arabic
func IsArabicLetter(ch rune) bool {
	return (ch >= 0x600 && ch <= 0x6FF)
}

//IsArabic checks if the input string contains arabic unicode only
func IsArabic(input string) bool {

	var isArabic = true
	for _, v := range input {
		if !unicode.IsSpace(v) && !IsArabicLetter(v) {
			isArabic = false
		}
	}
	return isArabic
}

//ToArabicDigits will convert english numbers to arabic numbers in text
func ToArabicDigits(input string) string {
	return strings.NewReplacer(
		"0", "٠",
		"1", "١",
		"2", "٢",
		"3", "٣",
		"4", "٤",
		"5", "٥",
		"6", "٦",
		"7", "٧",
		"8", "٨",
		"9", "٩",
	).Replace(input)
}

//ToEnglishDigits will convert arabic numbers to english numbers in text
func ToEnglishDigits(input string) string {
	return strings.NewReplacer(
		"٠", "0",
		"١", "1",
		"٢", "2",
		"٣", "3",
		"٤", "4",
		"٥", "5",
		"٦", "6",
		"٧", "7",
		"٨", "8",
		"٩", "9",
	).Replace(input)
}