Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

# Natural Language Toolkit: Europarl Corpus Readers 

# 

# Copyright (C) 2001-2012 NLTK Project 

# Author:  Nitin Madnani <nmadnani@umiacs.umd.edu> 

# URL: <http://www.nltk.org/> 

# For license information, see LICENSE.TXT 

 

import re 

from .util import LazyCorpusLoader 

from .reader import * 

 

# Create a new corpus reader instance for each European language 

danish = LazyCorpusLoader( 

    'europarl_raw/danish', EuroparlCorpusReader, r'ep-.*\.da', encoding='utf-8') 

 

dutch = LazyCorpusLoader( 

    'europarl_raw/dutch', EuroparlCorpusReader, r'ep-.*\.nl', encoding='utf-8') 

 

english = LazyCorpusLoader( 

    'europarl_raw/english', EuroparlCorpusReader, r'ep-.*\.en', encoding='utf-8') 

 

finnish = LazyCorpusLoader( 

    'europarl_raw/finnish', EuroparlCorpusReader, r'ep-.*\.fi', encoding='utf-8') 

 

french = LazyCorpusLoader( 

    'europarl_raw/french', EuroparlCorpusReader, r'ep-.*\.fr', encoding='utf-8') 

 

german = LazyCorpusLoader( 

    'europarl_raw/german', EuroparlCorpusReader, r'ep-.*\.de', encoding='utf-8') 

 

greek = LazyCorpusLoader( 

    'europarl_raw/greek', EuroparlCorpusReader, r'ep-.*\.el', encoding='utf-8') 

 

italian = LazyCorpusLoader( 

    'europarl_raw/italian', EuroparlCorpusReader, r'ep-.*\.it', encoding='utf-8') 

 

portuguese = LazyCorpusLoader( 

    'europarl_raw/portuguese', EuroparlCorpusReader, r'ep-.*\.pt', encoding='utf-8') 

 

spanish = LazyCorpusLoader( 

    'europarl_raw/spanish', EuroparlCorpusReader, r'ep-.*\.es', encoding='utf-8') 

 

swedish = LazyCorpusLoader( 

    'europarl_raw/swedish', EuroparlCorpusReader, r'ep-.*\.sv', encoding='utf-8')