This recipe takes sample movie and rating data, builds a graph, and alerts when an actor is also the director of a movie. It features ingest from multiple sources (CSV files).
Download these files to same directory as Quine.
java \
-jar quine-x.x.x.jar -r movieData \
--recipe-value movie_file=movieData.csv \
--recipe-value rating_file=ratingData.csv
# Recipe schema version (currently only supported value is 1;)
version: 1
# Identifies the Recipe but is not necessarily unique or immutable
title: Ingesting CSV Files
# URL to social profile of the person or organization responsible for this Recipe
contributor: https://github.com/maglietti
# Brief copy about this Recipe
summary: |-
This recipe takes sample movie and rating data, builds a graph, and alerts
when an actor is also the director of a movie.
# Longer form copy about this Recipe
description: |-
This recipe is part of the Quine Ingest Stream blog series. This time, we'll
work with `CSV` data exported from IMDb to answer the question; *"Which actors
have acted in and directed the same movie?"*
# Ingest Streams connect to data sources and establish how data is processed and transformed
ingestStreams:
# INGEST-1
- type: FileIngest
path: $movie_file
format:
type: CypherCsv
headers: true
query: |-
WITH $that AS row
MATCH (m)
WHERE row.Entity = 'Movie'
AND id(m) = idFrom("Movie", row.movieId)
SET
m:Movie,
m.tmdbId = row.tmdbId,
m.imdbId = row.imdbId,
m.imdbRating = toFloat(row.imdbRating),
m.released = row.released,
m.title = row.title,
m.year = toInteger(row.year),
m.poster = row.poster,
m.runtime = toInteger(row.runtime),
m.countries = split(coalesce(row.countries,""), "|"),
m.imdbVotes = toInteger(row.imdbVotes),
m.revenue = toInteger(row.revenue),
m.plot = row.plot,
m.url = row.url,
m.budget = toInteger(row.budget),
m.languages = split(coalesce(row.languages,""), "|"),
m.movieId = row.movieId
WITH m,split(coalesce(row.genres,""), "|") AS genres
UNWIND genres AS genre
WITH m, genre
MATCH (g)
WHERE id(g) = idFrom("Genre", genre)
SET g.genre = genre, g:Genre
MERGE (m:Movie)-[:IN_GENRE]->(g:Genre)
# INGEST-2
- type: FileIngest
path: $movie_file
format:
type: CypherCsv
headers: true
query: |-
WITH $that AS row
MATCH (p)
WHERE row.Entity = "Person"
AND id(p) = idFrom("Person", row.tmdbId)
SET
p:Person,
p.imdbId = row.imdbId,
p.bornIn = row.bornIn,
p.name = row.name,
p.bio = row.bio,
p.poster = row.poster,
p.url = row.url,
p.born = row.born,
p.died = row.died,
p.tmdbId = row.tmdbId,
p.born = CASE row.born WHEN "" THEN null ELSE datetime(row.born + "T00:00:00Z") END,
p.died = CASE row.died WHEN "" THEN null ELSE datetime(row.died + "T00:00:00Z") END
# INGEST-3
- type: FileIngest
path: $movie_file
format:
type: CypherCsv
headers: true
query: |-
WITH $that AS row
WITH row
WHERE row.Entity = "Join"
AND row.Work = "Acting"
MATCH (p), (m), (r)
WHERE id(p) = idFrom("Person", row.tmdbId)
AND id(m) = idFrom("Movie", row.movieId)
AND id(r) = idFrom("Role", row.tmdbId, row.movieId, row.role)
SET
r.role = row.role,
r.movie = row.movieId,
r.tmdbId = row.tmdbId,
r:Role
MERGE (p:Person)-[:PLAYED]->(r:Role)<-[:HAS_ROLE]-(m:Movie)
MERGE (p:Person)-[:ACTED_IN]->(m:Movie)
# INGEST-4
- type: FileIngest
path: $movie_file
format:
type: CypherCsv
headers: true
query: |-
WITH $that AS row
WITH row WHERE row.Entity = "Join" AND row.Work = "Directing"
MATCH (p), (m)
WHERE id(p) = idFrom("Person", row.tmdbId)
AND id(m) = idFrom("Movie", row.movieId)
MERGE (p:Person)-[:DIRECTED]->(m:Movie)
# INGEST-5
- type: FileIngest
path: $rating_file
format:
type: CypherCsv
headers: true
query: |-
WITH $that AS row
MATCH (m), (u), (rtg)
WHERE id(m) = idFrom("Movie", row.movieId)
AND id(u) = idFrom("User", row.userId)
AND id(rtg) = idFrom("Rating", row.movieId, row.userId, row.rating)
SET u.name = row.name, u:User
SET rtg.rating = row.rating,
rtg.timestamp = toInteger(row.timestamp),
rtg:Rating
MERGE (u:User)-[:SUBMITTED]->(rtg:Rating)<-[:HAS_RATING]-(m:Movie)
MERGE (u:User)-[:RATED]->(m:Movie)
# Standing Queries define how data is transformed and output.
standingQueries:
- pattern:
type: Cypher
mode: MultipleValues
query: |-
MATCH (a:Movie)<-[:ACTED_IN]-(p:Person)-[:DIRECTED]->(m:Movie)
WHERE id(a) = id(m)
RETURN id(m) as movieId, m.title as Movie, id(p) as personId, p.name as Actor
outputs:
set-ActedDirected:
type: CypherQuery
query: |-
MATCH (m),(p)
WHERE strId(m) = $that.data.movie
AND strId(p) = $that.data.person
MERGE (p:Person)-[:ActedDirected]->(m:Movie)
log-actor-director:
type: WriteToFile
path: "ActorDirector.jsonl"
# Customize node appearance in web UI.
nodeAppearances:
- predicate:
dbLabel: Movie
propertyKeys: []
knownValues: {}
icon: ion-android-film
label:
key: title
type: Property
- predicate:
dbLabel: Person
propertyKeys: []
knownValues: {}
icon: ion-android-person
color: "#ffd700"
label:
key: name
type: Property
- predicate:
dbLabel: Role
propertyKeys: []
knownValues: {}
icon: ion-android-microphone
color: "#7CFC00"
label:
key: role
type: Property
- predicate:
dbLabel: User
propertyKeys: []
knownValues: {}
icon: ion-android-chat
color: "#7e7e7e"
label:
key: name
type: Property
- predicate:
dbLabel: Genre
propertyKeys: []
knownValues: {}
icon: ion-android-menu
color: "#00FFFF"
label:
key: genre
type: Property
- predicate:
dbLabel: Rating
propertyKeys: []
knownValues: {}
icon: ion-android-star
color: "#9932CC"
label:
key: rating
type: Property
# Add queries to node context menus in web UI
quickQueries: []
# Customize sample queries listed in web UI
sampleQueries:
- name: Sample of Nodes
query: match(n) return n limit 10
- name: Count Nodes
query: MATCH (n) RETURN distinct labels(n), count(*)
- name: Count Relationships
query: MATCH (n)-[r]->() RETURN type(r), count(*)
- name: Movie Genres
query: match (g:Genre) return g
- name: Person Acted In a movie
query: MATCH (p:Person)-[:ACTED_IN]->(m:Movie) RETURN *
- name: Person Directed a movie
query: MATCH (p:Person)-[:DIRECTED]-(m:Movie) RETURN *
- name: Person Acted In and Directed a movie
query: MATCH (p:Person)-[:ActedDirected]->(m:Movie) RETURN *
- name: User Rated a movie
query: MATCH (u:User)-[:RATED]-(m:Movie) RETURN *