# -------------------------------------------------------------------------------
#
# Name: Quizlet plugin for Anki 2.0
# Purpose: Import decks from Quizlet into Anki 2.0
# Author:
# - Original: (c) Rolph Recto 2012, last updated 12/06/2012
# https://github.com/rolph-recto/Anki-Quizlet
# - Also: Contributions from https://ankiweb.net/shared/info/1236400902
# - Current: JDMaybeMD
# Created: 04/07/2017
#
# Changlog: Inital release
# * 2023-04-02 parser improvements
# * 2023-02-26 partial shapes support
# * 2022-10-30 add a proxy retry
# * 2022-05-15 add a rich text support
# * 2022-05-12 custom media folder fix (thx, https://github.com/mhujer)
# * 2022-04-20 add an "Add reverse" option
# * 2022-04-18 fix issue with original audio
# * 2022-04-17 fix issue with images/audio
# * 2022-04-10 fix mapping algorithm (thx, https://github.com/mhujer)
# * 2020-09-10 update audio download algorithm
# * 2020-09-08 have fixed audio download for special decks :)
# * 2020-09-06 have fixed a partial import. shame on me :)
# * 2020-09-05 made an audio download optional
# * 2020-09-05 update a quizlet parser
# -------------------------------------------------------------------------------
#!/usr/bin/env python
import re
import json
import urllib.parse
import requests
import webbrowser
from aqt.utils import showText
from aqt.qt import *
from aqt import mw
from operator import itemgetter
import urllib
try:
import urllib2
except Exception:
import urllib.request as urllib2
__window = None
# Anki
requests.packages.urllib3.disable_warnings()
headers = {
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36"
}
# add custom model if needed
def addCustomModel(name, col):
# create custom model for imported deck
mm = col.models
existing = mm.byName("Basic Quizlet Extended")
if existing:
return existing
m = mm.new("Basic Quizlet Extended")
# add fields
mm.addField(m, mm.newField("FrontText"))
mm.addField(m, mm.newField("FrontAudio"))
mm.addField(m, mm.newField("BackText"))
mm.addField(m, mm.newField("BackAudio"))
mm.addField(m, mm.newField("Image"))
mm.addField(m, mm.newField("Add Reverse"))
# add cards
t = mm.newTemplate("Normal")
# front
t['qfmt'] = "{{FrontText}}\n
\n{{FrontAudio}}"
t['afmt'] = "{{FrontText}}\n
\n{{BackText}}\n
\n{{Image}}\n
\n{{BackAudio}}"
mm.addTemplate(m, t)
# back
t = mm.newTemplate("Reverse")
t['qfmt'] = "{{#Add Reverse}}{{BackText}}\n
\n{{BackAudio}}{{/Add Reverse}}"
t['afmt'] = "{{BackText}}\n
\n{{FrontText}}\n
\n{{FrontAudio}}\n{{Image}}"
mm.addTemplate(m, t)
mm.add(m)
return m
# throw up a window with some info (used for testing)
def debug(message):
QMessageBox.information(QWidget(), "Message", message)
def getText(d, text=''):
if d is None:
return text
if d['type'] == 'text':
text = d['text']
if 'marks' in d:
for m in d['marks']:
if m['type'] in ['b', 'i', 'u']:
text = '<{0}>{1}{0}>'.format(m['type'], text)
if 'attrs' in m:
attrs = " ".join(['{}="{}"'.format(k, v)
for k, v in m['attrs'].items()])
text = '{}'.format(attrs, text)
return text
text = ''.join([getText(c) for c in d['content']]
) if d.get('content') else ''
if d['type'] == 'paragraph':
text = '{}
'.format(text)
return text
def ankify(text):
text = text.replace('\n', '
')
text = text.replace('class="bgY"', 'style="background-color:#fff4e5;"')
text = text.replace('class="bgB"', 'style="background-color:#cde7fa;"')
text = text.replace('class="bgP"', 'style="background-color:#fde8ff;"')
return text
class QuizletWindow(QWidget):
# main window of Quizlet plugin
def __init__(self):
super(QuizletWindow, self).__init__()
self.results = None
self.thread = None
self.initGUI()
# create GUI skeleton
def initGUI(self):
self.box_top = QVBoxLayout()
self.box_upper = QHBoxLayout()
# left side
self.box_left = QVBoxLayout()
self.check_boxes = QHBoxLayout()
self.box_incoming_html = QHBoxLayout()
self.box_incoming_html_left = QVBoxLayout()
self.box_incoming_html_right = QHBoxLayout()
self.value_incoming_html = QTextEdit("", self)
self.value_incoming_html.setMinimumWidth(300)
self.value_incoming_html.setPlaceholderText(
"""Enter page html if you constantly receive errors
1.Enter the url
2.Click on the 'Open page' button
3.Right click, 'View page source'
4.Copy the html
5.If you don't need audio, uncheck the box
""")
self.label_incoming_html = QLabel("Page html:")
self.label_incoming_html.setMinimumWidth(98)
self.button_html = QPushButton("Open html", self)
self.button_html.clicked.connect(self.onHmtl)
self.box_incoming_html_left.addWidget(self.label_incoming_html)
self.box_incoming_html_left.addWidget(self.button_html)
self.box_incoming_html_left.addStretch()
self.box_incoming_html_right.addWidget(self.value_incoming_html)
self.box_incoming_html.addLayout(self.box_incoming_html_left)
self.box_incoming_html.addLayout(self.box_incoming_html_right)
# quizlet url field
self.box_name = QHBoxLayout()
self.label_url = QLabel("Quizlet URL:")
self.text_url = QLineEdit("", self)
self.text_url.setMinimumWidth(300)
self.text_url.setFocusPolicy(Qt.FocusPolicy.StrongFocus)
self.text_url.setFocus()
self.label_url.setMinimumWidth(100)
self.box_name.addWidget(self.label_url)
self.box_name.addWidget(self.text_url)
self.box_download_audio = QHBoxLayout()
self.value_download_audio = QCheckBox("", self)
self.value_download_audio.toggle()
self.label_download_audio = QLabel("Download audio:")
self.label_download_audio.setMinimumWidth(100)
self.box_download_audio.addWidget(self.label_download_audio)
self.box_download_audio.addWidget(self.value_download_audio)
self.box_add_reverse = QHBoxLayout()
self.value_add_reverse = QCheckBox("", self)
self.label_add_reverse = QLabel("Add reverse:")
self.box_add_reverse.addWidget(self.label_add_reverse)
self.box_add_reverse.addWidget(self.value_add_reverse)
self.box_skip_errors = QHBoxLayout()
self.value_skip_errors = QCheckBox("", self)
self.value_skip_errors.setToolTip(
'Will skip audio/images download errors')
self.label_skip_errors = QLabel("Skip errors:")
self.label_skip_errors.setToolTip(
'Will skip audio/images download errors')
self.box_skip_errors.addWidget(self.label_skip_errors)
self.box_skip_errors.addWidget(self.value_skip_errors)
self.box_start_phrase = QHBoxLayout()
self.value_start_phrase = QLineEdit("", self)
self.value_start_phrase.setMinimumWidth(300)
self.value_start_phrase.setPlaceholderText(
'Start from this phrase. Can be empty')
self.label_start_phrase = QLabel("Start Phrase:")
self.label_start_phrase.setMinimumWidth(100)
self.box_start_phrase.addWidget(self.label_start_phrase)
self.box_start_phrase.addWidget(self.value_start_phrase)
self.box_stop_phrase = QHBoxLayout()
self.value_stop_phrase = QLineEdit("", self)
self.value_stop_phrase.setMinimumWidth(300)
self.value_stop_phrase.setPlaceholderText(
'Stop after this phrase. Can be empty')
self.label_stop_phrase = QLabel("Stop Phrase:")
self.label_stop_phrase.setMinimumWidth(100)
self.box_stop_phrase.addWidget(self.label_stop_phrase)
self.box_stop_phrase.addWidget(self.value_stop_phrase)
# add layouts to left
self.box_left.addLayout(self.box_name)
self.box_left.addLayout(self.check_boxes)
self.check_boxes.addLayout(self.box_download_audio)
self.check_boxes.addLayout(self.box_add_reverse)
self.check_boxes.addLayout(self.box_skip_errors)
self.check_boxes.addStretch()
self.box_left.addLayout(self.box_start_phrase)
self.box_left.addLayout(self.box_stop_phrase)
self.box_left.addLayout(self.box_incoming_html)
# right side
self.box_right = QVBoxLayout()
# code (import set) button
self.box_code = QHBoxLayout()
self.button_code = QPushButton("Import Deck", self)
# self.box_code.addStretch(1)
self.box_code.addWidget(self.button_code)
self.button_code.clicked.connect(self.onCode)
# add layouts to right
self.box_right.addLayout(self.box_code)
self.box_right.addStretch()
# add left and right layouts to upper
self.box_upper.addLayout(self.box_left)
self.box_upper.addSpacing(20)
self.box_upper.addLayout(self.box_right)
# results label
self.label_results = QLabel(
"\r\nExample: https://quizlet.com/150875612/usmle-flash-cards/")
# add all widgets to top layout
self.box_top.addLayout(self.box_upper)
self.box_top.addWidget(self.label_results)
self.box_top.addStretch(1)
self.setLayout(self.box_top)
# go, baby go!
self.setMinimumWidth(600)
self.setSizePolicy(QSizePolicy.Policy.Minimum, QSizePolicy.Policy.Minimum)
self.setWindowTitle("Improved Quizlet to Anki Importer")
self.show()
def onHmtl(self):
"""
Opens the flascards html page in browser
"""
quizletDeckID = self.getQuizletDeckID()
if quizletDeckID == None:
return
webbrowser.open(
"https://quizlet.com/{}/flashcards".format(quizletDeckID))
def getQuizletDeckID(self):
# grab url input
url = self.text_url.text()
# voodoo needed for some error handling
if urllib.parse.urlparse(url).scheme:
urlDomain = urllib.parse.urlparse(url).netloc
else:
urlDomain = urllib.parse.urlparse("https://"+url).netloc
# validate quizlet URL
if url == "":
self.label_results.setText("Oops! You forgot the deck URL :(")
return
elif not "quizlet.com" in urlDomain:
self.label_results.setText("Oops! That's not a Quizlet URL :(")
return
# voodoo needed for some error handling
if urllib.parse.urlparse(url).scheme:
urlPath = urllib.parse.urlparse(url).path
else:
urlPath = urllib.parse.urlparse("https://"+url).path
# validate and set Quizlet deck ID
quizletDeckID = urlPath.strip("/")
if quizletDeckID == "":
self.label_results.setText("Oops! Please use the full deck URL :(")
return
elif not bool(re.search(r'\d', quizletDeckID)):
self.label_results.setText(
"Oops! No deck ID found in path {0} :(".format(quizletDeckID))
return
else: # get first set of digits from url path
quizletDeckID = re.search(r"\d+", quizletDeckID).group(0)
return quizletDeckID
def onCode(self):
html = self.value_incoming_html.toPlainText()
quizletDeckID = self.getQuizletDeckID()
if quizletDeckID == None:
return
# and aaawaaaay we go...
self.label_results.setText("Connecting to Quizlet...")
# build URL
deck_url = "https://quizlet.com/{}/flashcards".format(quizletDeckID)
# download the data!
self.thread = QuizletDownloader(self, deck_url, quizletDeckID, html)
self.thread.start()
while not self.thread.isFinished():
mw.app.processEvents()
self.thread.wait(50)
# error fetching data
if self.thread.error:
if self.thread.errorCode == 403:
if self.thread.errorCaptcha:
self.label_results.setText(
"Sorry, it's behind a captcha. Try to disable VPN")
else:
self.label_results.setText(
"Sorry, this is a private deck :(")
elif self.thread.errorCode == 404:
self.label_results.setText(
"Can't find a deck with the ID {0}".format(quizletDeckID))
else:
self.label_results.setText("Unknown Error")
# errorMessage = json.loads(self.thread.errorMessage)
# showText(json.dumps(errorMessage, indent=4))
showText(self.thread.errorMessage)
else: # everything went through, let's roll!
deck = self.thread.results
# self.label_results.setText(("Importing deck {0} by {1}...".format(deck["title"], deck["created_by"])))
self.label_results.setText(
("Importing deck {0}...".format(deck["title"])))
self.createDeck(deck)
# self.label_results.setText(("Success! Imported {0} ({1} cards by {2})".format(deck["title"], deck["term_count"], deck["created_by"])))
self.label_results.setText(
("Success! Imported {0} ({1} cards)".format(deck["title"], deck["term_count"])))
# self.thread.terminate()
self.thread = None
def createDeck(self, result):
# create new deck and custom model
if "set" in result:
name = result['set']['title']
elif "studyable" in result:
name = result['studyable']['title']
else:
name = result['title']
items = result['items']
progress = 0
result['term_count'] = len(items)
deck = mw.col.decks.get(mw.col.decks.id(name))
model = addCustomModel(name, mw.col)
# assign custom model to new deck
mw.col.decks.select(deck["id"])
mw.col.decks.save(deck)
# assign new deck to custom model
mw.col.models.setCurrent(model)
model["did"] = deck["id"]
mw.col.models.save(model)
startProcess = False
stopProcess = False
startPhrase = self.value_start_phrase.text()
stopPhrase = self.value_stop_phrase.text()
downloadAudio = self.value_download_audio.isChecked()
addReverse = self.value_add_reverse.isChecked()
for item in items:
if "".__eq__(startPhrase) or startPhrase == item["term"] or startPhrase == item["definition"]:
startProcess = True
if not stopProcess and startProcess:
note = mw.col.newNote()
note["FrontText"] = item["term"]
note["BackText"] = item["definition"]
note["FrontText"] = ankify(note["FrontText"])
note["BackText"] = ankify(note["BackText"])
if item.get('termAudio') and downloadAudio:
file_name = self.fileDownloader(self.getAudioUrl(
item['termAudio']), str(item["id"]) + "-front.mp3")
if file_name:
note["FrontAudio"] = "[sound:" + file_name + "]"
if item.get('definitionAudio') and downloadAudio:
file_name = self.fileDownloader(self.getAudioUrl(
item["definitionAudio"]), str(item["id"]) + "-back.mp3")
if file_name:
note["BackAudio"] = "[sound:" + file_name + "]"
if item.get('imageUrl'):
file_name = self.fileDownloader(item["imageUrl"])
if file_name:
note["Image"] += ''.format(
file_name)
mw.app.processEvents()
if addReverse:
note["Add Reverse"] = "True"
mw.col.addNote(note)
progress += 1
self.label_results.setText(
("Imported {0}/{1}".format(progress, len(items))))
mw.app.processEvents()
if not "".__eq__(stopPhrase) and (stopPhrase == item["term"] or stopPhrase == item["definition"]):
stopProcess = True
mw.col.reset()
mw.reset()
def getAudioUrl(self, word_audio):
if word_audio.startswith('http'):
return word_audio
else:
quizlet_url = urllib.parse.quote("https://quizlet.com/{0}".format(word_audio), safe='()*!\'');
return "https://quizlet-proxy.proto.click/quizlet-media?url={0}".format(quizlet_url)
# download the images
def fileDownloader(self, url, suffix=''):
skip_errors = self.value_skip_errors.isChecked()
url = url.replace('_m', '')
file_name = "quizlet-" + \
suffix if suffix else "quizlet-" + url.split('/')[-1]
try:
r = urllib2.urlopen(urllib2.Request(url, headers=headers))
if r.getcode() == 200:
with open(mw.col.media.dir() + "/" + file_name, 'wb') as f:
f.write(r.read())
return file_name
except urllib2.HTTPError as e:
if skip_errors:
return None
else:
raise e
def parseTextItem(item):
return getText(item["richText"], item["plainText"])
def mapItems(studiableItems, setIdToDiagramImage):
result = []
for studiableItem in studiableItems:
image = None
term_audio = None
definition_audio = None
for side in studiableItem["cardSides"]:
if (side["label"] == "word"):
for media in side["media"]:
if media["type"] == 4:
term_audio = media["url"]
if media["type"] == 1:
term = parseTextItem(media)
if media["ttsUrl"] and term_audio == None:
term_audio = media["ttsUrl"]
if (side["label"] == "definition"):
for media in side["media"]:
if media["type"] == 4:
definition_audio = media["url"]
if media["type"] == 1:
definition = parseTextItem(media)
if media["ttsUrl"] and definition_audio == None:
definition_audio = media["ttsUrl"]
if (media["type"] == 2) and (image == None):
image = media["url"]
# partial shape support
if (side["label"] == "location"):
for media in side["media"]:
if (media["type"] == 5) and (image == None):
image = setIdToDiagramImage[str(
studiableItem["studiableContainerId"])]["url"]
result.append({
"id": studiableItem["id"],
"term": term,
"termAudio": term_audio,
"definition": definition,
"definitionAudio": definition_audio,
"imageUrl": image
})
return result
class QuizletDownloader(QThread):
# thread that downloads results from the Quizlet API
def __init__(self, window, url, quizletDeckID, html):
super(QuizletDownloader, self).__init__()
self.window = window
self.url = url
self.results = None
self.html = html
self.quizletDeckID = quizletDeckID
self.error = False
self.errorCode = None
self.errorCaptcha = False
self.errorReason = None
self.errorMessage = None
def getDataFromApi(self):
try:
deckUrl = 'https://quizlet.com/webapi/3.9/sets/{0}'.format(
self.quizletDeckID)
# TODO download more than 1000 items
itemsUrl = 'https://quizlet.com/webapi/3.9/studiable-item-documents?filters%5BstudiableContainerId%5D={0}&filters%5BstudiableContainerType%5D=1&perPage={1}&page=1'.format(
self.quizletDeckID, 1000)
deckResponse = requests.get(deckUrl, verify=False, headers=headers)
itemsResponse = requests.get(
itemsUrl, verify=False, headers=headers)
rawJson = {"studiableDocumentData": json.loads(
itemsResponse.text)["responses"][0]["models"]}
items = mapItems(rawJson)
title = json.loads(deckResponse.text)["responses"][
0]['models']['set'][0]['title']
self.results = {}
self.results['items'] = items
self.results['title'] = title
except Exception as e:
self.error = True
self.errorMessage = "{}\n-----------------\n{}".format(
e, itemsResponse.text)
def getDataFromPage(self):
proxyRetry = True
while True:
try:
r = None
config = mw.addonManager.getConfig(__name__)
cookies = {}
if config["qlts"]:
cookies = {"qlts": config["qlts"]}
elif config["cookies"]:
from http.cookies import SimpleCookie
C = SimpleCookie()
C.load(config["cookies"])
cookies = {key: morsel.value for key, morsel in C.items()}
page_html = ''
if self.html:
page_html = self.html
else:
url = self.url if proxyRetry else 'https://quizlet-proxy.proto.click/quizlet-deck?url=' + \
urllib.parse.quote(self.url, safe='()*!\'')
r = requests.get(url, verify=False,
headers=headers, cookies=cookies)
r.raise_for_status()
page_html = r.text
regex = re.escape('window.Quizlet["setPasswordData"]')
if re.search(regex, page_html):
if (proxyRetry):
proxyRetry = False
continue
else:
self.error = True
self.errorCode = 403
return
regex = re.escape('window.Quizlet["setPageData"] = ')
regex += r'(.+?)'
regex += re.escape('; QLoad("Quizlet.setPageData");')
m = re.search(regex, page_html)
studiableItems = None
setIdToDiagramImage = None
if not m:
regex = re.escape('window.Quizlet["assistantModeData"] = ')
regex += r'(.+?)'
regex += re.escape('; QLoad("Quizlet.assistantModeData");')
m = re.search(regex, page_html)
if m:
data = json.loads(m.group(1).strip())
studiableDocumentData = data['studiableDocumentData']
setIdToDiagramImage = studiableDocumentData.get(
'setIdToDiagramImage', None)
studiableItems = studiableDocumentData.get(
'studiableItems', studiableDocumentData.get('studiableItem'))
if not m:
regex = re.escape('window.Quizlet["cardsModeData"] = ')
regex += r'(.+?)'
regex += re.escape('; QLoad("Quizlet.cardsModeData");')
m = re.search(regex, page_html)
if m:
data = json.loads(m.group(1).strip())
studiableDocumentData = data['studiableDocumentData']
setIdToDiagramImage = studiableDocumentData.get(
'setIdToDiagramImage', None)
studiableItems = studiableDocumentData.get(
'studiableItems', studiableDocumentData.get('studiableItem'))
if not m:
regex = re.escape('dehydratedReduxStateKey":')
regex += r'(.+?)'
regex += re.escape('},"__N_SSP')
m = re.search(regex, page_html)
rawData = m.group(1).strip()
data = json.loads(json.loads(rawData))
studiableItems = data["studyModesCommon"]["studiableData"]["studiableItems"]
setIdToDiagramImage = data["studyModesCommon"]["studiableData"]["setIdToDiagramImage"]
if not studiableItems:
raise Exception("Can't extract data")
self.results = {}
self.results['items'] = mapItems(
studiableItems, setIdToDiagramImage)
title = os.path.basename(
self.url.strip()) or "Quizlet Flashcards"
m = re.search(r'(.+?)', page_html)
if m:
title = m.group(1)
title = re.sub(r' \| Quizlet$', '', title)
title = re.sub(r'^Flashcards ', '', title)
title = re.sub(r'\s+', ' ', title)
title = title.strip()
self.results['title'] = title
except requests.HTTPError as e:
if proxyRetry == True:
proxyRetry = False
continue
else:
self.error = True
self.errorCode = e.response.status_code
self.errorMessage = e.response.text
if "CF-Chl-Bypass" in e.response.headers:
self.errorCaptcha = True
except ValueError as e:
if proxyRetry == True:
proxyRetry = False
continue
else:
self.error = True
self.errorMessage = "Invalid json1: {0}".format(e)
except Exception as e:
if proxyRetry == True and not self.html:
proxyRetry = False
continue
else:
self.error = True
self.errorMessage = "{}\n-----------------\n{}".format(
e, page_html)
break
# yep, we got it
def run(self):
self.getDataFromPage()
if (self.error):
self.getDataFromApi()
# plugin was called from Anki
def runQuizletPlugin():
global __window
__window = QuizletWindow()
# create menu item in Anki
action = QAction("Import from Quizlet", mw)
action.triggered.connect(runQuizletPlugin)
mw.form.menuTools.addAction(action)