# This script was created using the following tools: # - Python3 # - nltk (Natural Language Toolkit), installed using pip3 # - From the nltk, wordnet and omw (Open Multilingual Wordnet)

from nltk.corpus import wordnet as wn import json import sys

# Everything that can be obtained from the WordNet interface: # - Synonyms: words contained within the same WordNet synset (example: ocean → sea) # - Hypernyms: “Kind of” (example: gondola → boat) # - Hyponyms: “More general than” (example: boat → gondola) # - Holonyms: “Comprises” (example: car → accelerator) # - Meronyms: “Part of” (example: trunk → tree) # - pertainym: “Of or pertaining to” # - Derivationally related forms: Terms in different syntactic categories that have the same root form and are semantically related.

# Configuration langKey = “spa” withTypeOfRelation = False

# Function definitions

# Obtain a list of synonyms from a synset, translate the Synset to a # language specific lemma and then obtain its name only if it is a single word # Input: array of Synset elements # Output set of strings containing the translated words def synonyms(synList):

relatedWords = set()
for relatedWord in synList:
    for lemma in relatedWord.lemmas(lang=langKey):
        name = lemma.name()
        if not '_' in name:
            relatedWords.add(lemma.name())
return relatedWords

# Obtain a list of related words from a lemma list. # First it translates the lemma to its language agnostic synset and then # back to a language specific lemma. # Input: array of Lemma elements # Output set of strings containing the related translated words def get_related_lemmas(lemmaList):

relatedLemmas = set()
for lemma in lemmaList:
    for langLemma in lemma.synset().lemmas(lang=langKey):
        name = langLemma.name()
        if not '_' in name:
            relatedLemmas.add(lemma.name())
return relatedLemmas

def format_synonyms(wordList, relationType):

results = []
for word in wordList:
    newWord = {
        'word': word,
        'relation': relationType,
        'score' : 100
    }
    results.append(newWord)
return results

# Read input and initalize variables wordList = []

argumentText = sys.argv fileInput = False if '.' in argumentText and argumentText != '.':

# Then a file is going to be read to get the list of words
fileInput = True
wordFile = open(argumentText, 'r')
for word in wordFile.read().split('\n'):
    wordList.append(word)
wordFile.close()

else:

# Otherwise assume the input is a single word to be processed
wordList.append(argumentText)

wordRelations = {} newWordList = set()

# Process each word and write its relations to the final file for word in wordList:

lemmas = wn.lemmas(word, lang=langKey);
synsets = wn.synsets(word, lang=langKey);
relatedWords = [] 
relatedWords += format_synonyms(synonyms(synsets), 'synonym')
for synset in synsets:
    relatedWords += format_synonyms(synonyms(synset.hyponyms()), "hyponym")
    relatedWords += format_synonyms(synonyms(synset.hypernyms()), "hypernym")
    relatedWords += format_synonyms(synonyms(synset.member_holonyms()), "holonym")
    relatedWords += format_synonyms(synonyms(synset.part_meronyms()), "meronym")
    relatedWords += format_synonyms(synonyms(synset.substance_meronyms()), "meronym")
for lemma in lemmas:
    relatedWords += format_synonyms(get_related_lemmas(lemma.derivationally_related_forms()), "derivationally")
    relatedWords += format_synonyms(get_related_lemmas(lemma.pertainyms()), "pertainym")
wordRelations[word] = relatedWords
newWordList.add(word)
for relatedWord in relatedWords:
    newWordList.add(relatedWord['word'])

# Build output as JSON jsonData = {

'words': list(sorted(newWordList)),
'relations': {}

} for key, value in wordRelations.items():

jsonData['relations'][key] = list(map(dict, frozenset(frozenset(i.items()) for i in value)))

print(json.dumps(jsonData))