version: 2
title: Ingest Wikipedia Page Create stream
contributor: https://github.com/landon9720
summary: Consume events about new Wikipedia pages to build a time series reified graph
description: |-
  Wikipedia page creation events are instantiated in the graph with relationships to a reified time model.
  Additionally, page creation event comments are echoed to standard output.

  Data source documentation: https://stream.wikimedia.org/?doc#/streams/get_v2_stream_page_create

ingestStreams:
  - name: wikipedia-page-create
    source:
      type: ServerSentEvent
      url: https://stream.wikimedia.org/v2/stream/page-create
      format:
        type: Json
    query: |-
      MATCH (revNode), (dbNode), (userNode)
      WHERE id(revNode) = idFrom("revision", $that.rev_id)
        AND id(dbNode) = idFrom("db", $that.database)
        AND id(userNode) = idFrom("id", $that.performer.user_id)

      // Set labels for nodes //
      CALL create.setLabels(revNode, ["rev:" + $that.page_title])
      CALL create.setLabels(dbNode, ["db:" + $that.database])
      CALL create.setLabels(userNode, ["user:" + $that.performer.user_text])

      // Create timeNode node to provide day/hour/minute bucketing and counting of revNodes //
      CALL reify.time(datetime($that.rev_timestamp), ["year", "month", "day", "hour", "minute", "second"]) YIELD node AS timeNode
      CALL incrementCounter(timeNode, "count", 1) YIELD count AS timeNodeCount

      // Set properties for nodes //
      SET revNode = $that,
          revNode.type = "rev"

      SET dbNode.database = $that.database,
          dbNode.type = "db"

      SET userNode = $that.performer,
          userNode.type = "user"

      // Create edges between nodes //
      CREATE (revNode)-[:DB]->(dbNode),
             (revNode)-[:BY]->(userNode),
             (revNode)-[:AT]->(timeNode)

standingQueries:
  - name: comment-output
    pattern:
      type: Cypher
      query: |-
        MATCH (n)
        WHERE n.comment IS NOT NULL
        RETURN DISTINCT id(n) AS id
      mode: DistinctId
    outputs:
      - name: output-1
        preEnrichmentTransformation:
          type: InlineData
        resultEnrichment:
          query: |-
            MATCH (n)
            WHERE id(n) = $that.id
            RETURN n.comment AS line
          parameter: that
        destinations:
          - type: StandardOut

nodeAppearances: []
quickQueries: []
sampleQueries:
  - name: Show time nodes
    query: >
      MATCH (n)
      WHERE n.period IS NOT NULL
      RETURN n
  - name: Show revision nodes
    query: >
      MATCH (n)
      WHERE n.type = "rev"
      RETURN n
  - name: Show database nodes
    query: >
      MATCH (n)
      WHERE n.type = "db"
      RETURN n
  - name: Show user nodes
    query: >
      MATCH (n)
      WHERE n.type = "user"
      RETURN n
