# DataFrame Conversions

This notebook demonstrates the Resources conversion to pandas DataFrame and vice-versa.

In [1]:
from kgforge.core import KnowledgeGraphForge

In [2]:
forge = KnowledgeGraphForge("../../configurations/demo-forge.yml")

# Imports

In [3]:
from kgforge.core import Resource

In [4]:
import pandas as pd
import numpy as np

## List of Resources to DataFrame

In [5]:
address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva")

In [6]:
jane = Resource(id="33532569-70eb-4648-a7f1-f7ea22b0ce38", type="Person", name="Jane Doe", address=address, email="(missing)")

In [7]:
john = Resource(id="45e018f4-9ade-4ad0-bdcf-63902bf51cc1", type="Person", name="John Smith", email="john.smith@epfl.ch")

In [8]:
persons = [jane, john]

In [9]:
forge.register(persons)

<count> 2
<action> _register_one
<succeeded> True


In [10]:
print(jane)

{
    id: 33532569-70eb-4648-a7f1-f7ea22b0ce38
    type: Person
    address:
    {
        type: PostalAddress
        country: Switzerland
        locality: Geneva
    }
    email: (missing)
    name: Jane Doe
}


In [11]:
print(john)

{
    id: 45e018f4-9ade-4ad0-bdcf-63902bf51cc1
    type: Person
    email: john.smith@epfl.ch
    name: John Smith
}


In [12]:
print(john._store_metadata)

{'version': 1, 'deprecated': False}


In [13]:
forge.as_dataframe(persons)

Unnamed: 0,id,type,address.type,address.country,address.locality,email,name
0,33532569-70eb-4648-a7f1-f7ea22b0ce38,Person,PostalAddress,Switzerland,Geneva,(missing),Jane Doe
1,45e018f4-9ade-4ad0-bdcf-63902bf51cc1,Person,,,,john.smith@epfl.ch,John Smith


In [14]:
forge.as_dataframe(persons, na="(missing)")

Unnamed: 0,id,type,address.type,address.country,address.locality,email,name
0,33532569-70eb-4648-a7f1-f7ea22b0ce38,Person,PostalAddress,Switzerland,Geneva,,Jane Doe
1,45e018f4-9ade-4ad0-bdcf-63902bf51cc1,Person,,,,john.smith@epfl.ch,John Smith


In [15]:
forge.as_dataframe(persons, nesting="__")

Unnamed: 0,id,type,address__type,address__country,address__locality,email,name
0,33532569-70eb-4648-a7f1-f7ea22b0ce38,Person,PostalAddress,Switzerland,Geneva,(missing),Jane Doe
1,45e018f4-9ade-4ad0-bdcf-63902bf51cc1,Person,,,,john.smith@epfl.ch,John Smith


In [16]:
forge.as_dataframe(persons, expanded=True)

Unnamed: 0,@id,@type,schema:name
0,file:///Users/agarcia/Developments/kgforge/exa...,schema:Person,Jane Doe
1,file:///Users/agarcia/Developments/kgforge/exa...,schema:Person,John Smith


In [17]:
forge.as_dataframe(persons, store_metadata=True)

Unnamed: 0,id,type,address.type,address.country,address.locality,email,name,deprecated,version
0,33532569-70eb-4648-a7f1-f7ea22b0ce38,Person,PostalAddress,Switzerland,Geneva,(missing),Jane Doe,False,1
1,45e018f4-9ade-4ad0-bdcf-63902bf51cc1,Person,,,,john.smith@epfl.ch,John Smith,False,1


## DataFrame to list of Resources

In [18]:
data = pd.DataFrame([
    {
        "id": "33532569-70eb-4648-a7f1-f7ea22b0ce38",
        "type": "Person",
        "address.type": "PostalAddress",
        "address.country": "Switzerland",
        "address.locality": "Geneva",
        "email": "(missing)",
        "name": "Jane Doe",
    },
    {
        "id": "45e018f4-9ade-4ad0-bdcf-63902bf51cc1",
        "type": "Person",
        "address.type": np.nan,
        "address.country": np.nan,
        "address.locality": np.nan,
        "email": "john.smith@epfl.ch",
        "name": "John Smith",
    }
])

In [19]:
data

Unnamed: 0,id,type,address.type,address.country,address.locality,email,name
0,33532569-70eb-4648-a7f1-f7ea22b0ce38,Person,PostalAddress,Switzerland,Geneva,(missing),Jane Doe
1,45e018f4-9ade-4ad0-bdcf-63902bf51cc1,Person,,,,john.smith@epfl.ch,John Smith


In [20]:
resources = forge.from_dataframe(data)

In [21]:
address = Resource(type="PostalAddress", country="Switzerland", locality="Geneva")

In [22]:
jane = Resource(id="33532569-70eb-4648-a7f1-f7ea22b0ce38", type="Person", name="Jane Doe", address=address, email="(missing)")

In [23]:
john = Resource(id="45e018f4-9ade-4ad0-bdcf-63902bf51cc1", type="Person", name="John Smith", email="john.smith@epfl.ch")

In [24]:
persons = [jane, john]

In [25]:
resources == persons

True

In [26]:
resources_na = forge.from_dataframe(data, na="(missing)")

In [27]:
print(resources[0])

{
    id: 33532569-70eb-4648-a7f1-f7ea22b0ce38
    type: Person
    address:
    {
        type: PostalAddress
        country: Switzerland
        locality: Geneva
    }
    email: (missing)
    name: Jane Doe
}


In [28]:
print(resources_na[0])

{
    id: 33532569-70eb-4648-a7f1-f7ea22b0ce38
    type: Person
    address:
    {
        type: PostalAddress
        country: Switzerland
        locality: Geneva
    }
    name: Jane Doe
}


In [29]:
resources_nesting = forge.from_dataframe(data, nesting="__")

In [30]:
print(resources[0])

{
    id: 33532569-70eb-4648-a7f1-f7ea22b0ce38
    type: Person
    address:
    {
        type: PostalAddress
        country: Switzerland
        locality: Geneva
    }
    email: (missing)
    name: Jane Doe
}


In [31]:
print(resources_nesting[0])

{
    id: 33532569-70eb-4648-a7f1-f7ea22b0ce38
    type: Person
    address.country: Switzerland
    address.locality: Geneva
    address.type: PostalAddress
    email: (missing)
    name: Jane Doe
}
