version: 1 title: Ingest Wikipedia Page Create stream contributor: https://github.com/landon9720 summary: Consume events about new Wikipedia pages to build a time series reified graph description: |- Wikipedia page creation events are instantiated in the graph with relationships to a reified time model. Additionally, page creation event comments are echoed to standard output. Data source documentation: https://stream.wikimedia.org/?doc#/streams/get_v2_stream_page_create ingestStreams: - type: ServerSentEventsIngest url: https://stream.wikimedia.org/v2/stream/page-create format: type: CypherJson query: |- MATCH (revNode), (dbNode), (userNode) WHERE id(revNode) = idFrom("revision", $that.rev_id) AND id(dbNode) = idFrom("db", $that.database) AND id(userNode) = idFrom("id", $that.performer.user_id) // Set labels for nodes // CALL create.setLabels(revNode, ["rev:" + $that.page_title]) CALL create.setLabels(dbNode, ["db:" + $that.database]) CALL create.setLabels(userNode, ["user:" + $that.performer.user_text]) // Create timeNode node to provide day/hour/minute bucketing and counting of revNodes // CALL reify.time(datetime($that.rev_timestamp), ["year", "month", "day", "hour", "minute", "second"]) YIELD node AS timeNode CALL incrementCounter(timeNode, "count", 1) YIELD count AS timeNodeCount // Set properties for nodes // SET revNode = $that, revNode.type = "rev" SET dbNode.database = $that.database, dbNode.type = "db" SET userNode = $that.performer, userNode.type = "user" // Create edges between nodes // CREATE (revNode)-[:DB]->(dbNode), (revNode)-[:BY]->(userNode), (revNode)-[:AT]->(timeNode) standingQueries: - pattern: type: Cypher query: |- MATCH (n) WHERE n.comment IS NOT NULL RETURN DISTINCT id(n) AS id outputs: output-1: type: CypherQuery query: |- MATCH (n) WHERE id(n) = $that.data.id RETURN n.comment AS line andThen: type: PrintToStandardOut nodeAppearances: [] quickQueries: [] sampleQueries: - name: Show time nodes query: > MATCH (n) WHERE n.period IS NOT NULL RETURN n - name: Show revision nodes query: > MATCH (n) WHERE n.type = "rev" RETURN n - name: Show database nodes query: > MATCH (n) WHERE n.type = "db" RETURN n - name: Show user nodes query: > MATCH (n) WHERE n.type = "user" RETURN n