SQL Nodes
using FunSQL
using FunSQL:
Agg, Append, As, Asc, Bind, CrossJoin, Define, Desc, Fun, From, Get,
Group, Highlight, Iterate, Join, LeftJoin, Limit, Lit, Order, Over,
Partition, SQLNode, SQLTable, Select, Sort, Var, Where, With,
WithExternal, ID, render
We start with specifying the database model.
const concept =
SQLTable(:concept, columns = [:concept_id, :vocabulary_id, :concept_code, :concept_name])
const location =
SQLTable(:location, columns = [:location_id, :city, :state])
const person =
SQLTable(:person, columns = [:person_id, :gender_concept_id, :year_of_birth, :month_of_birth, :day_of_birth, :birth_datetime, :location_id])
const visit_occurrence =
SQLTable(:visit_occurrence, columns = [:visit_occurrence_id, :person_id, :visit_start_date, :visit_end_date])
const measurement =
SQLTable(:measurement, columns = [:measurement_id, :person_id, :measurement_concept_id, :measurement_date])
const observation =
SQLTable(:observation, columns = [:observation_id, :person_id, :observation_concept_id, :observation_date])
In FunSQL, a SQL query is generated from a tree of SQLNode
objects. The nodes are created using constructors with familiar SQL names and connected together using the chain (|>
) operator.
q = From(person) |>
Where(Fun.">"(Get.year_of_birth, 2000)) |>
Select(Get.person_id)
#-> (…) |> Select(…)
Displaying a SQLNode
object shows how it was constructed.
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
q3 = q2 |> Select(Get.person_id)
q3
end
=#
Each node wraps a concrete node object, which can be accessed using the indexing operator.
q[]
#-> ((…) |> Select(…))[]
display(q[])
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
q3 = q2 |> Select(Get.person_id)
q3[]
end
=#
The SQL query is generated using the function render()
.
print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 2000)
=#
Ill-formed queries are detected.
q = From(person) |> Agg.count() |> Select(Get.person_id)
render(q)
#=>
ERROR: FunSQL.IllFormedError in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Agg.count() |> Select(Get.person_id)
q2
end
=#
q = From(person) |> Fun.current_date()
#=>
ERROR: FunSQL.RebaseError in:
Fun.current_date()
=#
@funsql
The @funsql
macro provides alternative notation for specifying FunSQL queries.
q = @funsql begin
from(person)
filter(year_of_birth > 2000)
select(person_id)
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
q3 = q2 |> Select(Get.person_id)
q3
end
=#
We can combine @funsql
notation with regular Julia code.
q = @funsql begin
from(person)
$(Where(Get.year_of_birth .> 2000))
select(person_id)
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
q3 = q2 |> Select(Get.person_id)
q3
end
=#
q = From(:person) |>
@funsql(filter(year_of_birth > 2000)) |>
Select(Get.person_id)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
q3 = q2 |> Select(Get.person_id)
q3
end
=#
The @funsql
notation allows us to encapsulate query fragments into query functions.
@funsql adults() = from(person).filter(2020 - year_of_birth >= 16)
display(@funsql adults())
#=>
let q1 = From(:person),
q2 = q1 |> Where(Fun.">="(Fun."-"(2020, Get.year_of_birth), 16))
q2
end
=#
Query functions defined with @funsql
can accept parameters.
@funsql concept_by_code(v, c) =
begin
from(concept)
filter(vocabulary_id == $v && concept_code == $c)
end
display(@funsql concept_by_code("SNOMED", "22298006"))
#=>
let q1 = From(:concept),
q2 = q1 |>
Where(Fun.and(Fun."="(Get.vocabulary_id, "SNOMED"),
Fun."="(Get.concept_code, "22298006")))
q2
end
=#
Query functions support ...
notation.
@funsql concept_by_code(v, cs...) =
begin
from(concept)
filter(vocabulary_id == $v && in(concept_code, $(cs...)))
end
display(@funsql concept_by_code("Visit", "IP", "ER"))
#=>
let q1 = From(:concept),
q2 = q1 |>
Where(Fun.and(Fun."="(Get.vocabulary_id, "Visit"),
Fun.in(Get.concept_code, "IP", "ER")))
q2
end
=#
Query functions support keyword arguments and default values.
@funsql age(yob = year_of_birth; at = fun(`EXTRACT(YEAR FROM CURRENT_DATE) `)) =
($at - $yob)
q = @funsql begin
from(person)
define(
age => age(),
age_in_2000 => age(at = 2000))
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |>
Define(Fun."-"(Fun."EXTRACT(YEAR FROM CURRENT_DATE) "(),
Get.year_of_birth) |>
As(:age),
Fun."-"(2000, Get.year_of_birth) |> As(:age_in_2000))
q2
end
=#
A parameter of a query function accepts a type declaration.
@funsql concept(c::String, v::String = "SNOMED") =
concept_by_code($v, $c)
@funsql concept(id::Int) =
from(concept).filter(concept_id == $id)
display(@funsql concept("22298006"))
#=>
let q1 = From(:concept),
q2 = q1 |>
Where(Fun.and(Fun."="(Get.vocabulary_id, "SNOMED"),
Fun."="(Get.concept_code, "22298006")))
q2
end
=#
display(@funsql concept(4329847))
#=>
let q1 = From(:concept),
q2 = q1 |> Where(Fun."="(Get.concept_id, 4329847))
q2
end
=#
A single @funsql
macro can wrap multiple definitions.
@funsql begin
SNOMED(codes...) = concept_by_code("SNOMED", $(codes...))
`MYOCARDIAL INFARCTION`() = SNOMED("22298006")
end
display(@funsql `MYOCARDIAL INFARCTION`())
#=>
let q1 = From(:concept),
q2 = q1 |>
Where(Fun.and(Fun."="(Get.vocabulary_id, "SNOMED"),
Fun."="(Get.concept_code, "22298006")))
q2
end
=#
A query function may have a docstring.
@funsql begin
"SNOMED concept set with the given `codes`"
SNOMED
"Visit concept set with the given `codes`"
Visit(codes...) = concept_by_code("Visit", $(codes...))
end
@doc funsql_SNOMED
#-> SNOMED concept set with the given `codes`
@doc funsql_Visit
#-> Visit concept set with the given `codes`
An ill-formed @funsql
query triggers an error.
@funsql for p in person; end
#=>
ERROR: LoadError: FunSQL.TransliterationError: ill-formed @funsql notation:
quote
for p = person
end
end
in expression starting at …
=#
Literals
A SQL value is created with Lit()
constructor.
e = Lit("SQL is fun!")
#-> Lit("SQL is fun!")
In a SELECT
clause, bare literal expressions get an alias "_"
.
q = Select(e)
print(render(q))
#=>
SELECT 'SQL is fun!' AS "_"
=#
Values of certain Julia data types are automatically converted to SQL literals when they are used in the context of a SQL node.
using Dates
q = Select("null" => missing,
"boolean" => true,
"integer" => 42,
"text" => "SQL is fun!",
"date" => Date(2000))
Such plain literals could also be used in @funsql
notation.
q = @funsql select(null => missing,
boolean => true,
integer => 42,
text => "SQL is fun!",
date => $(Date(2000)))
display(q)
#=>
Select(missing |> As(:null),
true |> As(:boolean),
42 |> As(:integer),
"SQL is fun!" |> As(:text),
Dates.Date("2000-01-01") |> As(:date))
=#
Attributes
To reference a table attribute, we use the Get
constructor.
e = Get(:person_id)
#-> Get.person_id
Alternatively, use shorthand notation.
Get.person_id
#-> Get.person_id
Get."person_id"
#-> Get.person_id
Get[:person_id]
#-> Get.person_id
Get["person_id"]
#-> Get.person_id
Hierarchical notation is supported.
e = Get.p.person_id
#-> Get.p.person_id
Get.p |> Get.person_id
#-> Get.p.person_id
In the context where a SQL node is expected, a bare symbol is automatically converted to a reference.
q = Select(:person_id)
display(q)
#-> Select(Get.person_id)
@funsql
macro translates an identifier to a symbol. In suitable context, this symbol will be translated to a column reference.
@funsql person_id
#-> :person_id
@funsql
notation supports hierarchical references.
@funsql p.person_id
#-> Get.p.person_id
Use backticks to represent a name that is not a valid identifier.
@funsql `person_id`
#-> :person_id
@funsql `p`.`person_id`
#-> Get.p.person_id
Get
can also create bound references.
q = From(person)
e = Get(over = q, :year_of_birth)
#-> (…) |> Get.year_of_birth
display(e)
#=>
let person = SQLTable(:person, …),
q1 = From(person)
q1.year_of_birth
end
=#
q.person_id
#-> (…) |> Get.person_id
q."person_id"
#-> (…) |> Get.person_id
q[:person_id]
#-> (…) |> Get.person_id
q["person_id"]
#-> (…) |> Get.person_id
Get
is used for dereferencing an alias created with As
.
q = From(person) |>
As(:p) |>
Select(Get.p.person_id)
print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#
This is particularly useful when you need to disambiguate the output of Join
.
q = From(person) |>
As(:p) |>
Join(From(location) |> As(:l),
on = Get.p.location_id .== Get.l.location_id) |>
Select(Get.p.person_id, Get.l.state)
print(render(q))
#=>
SELECT
"person_1"."person_id",
"location_1"."state"
FROM "person" AS "person_1"
JOIN "location" AS "location_1" ON ("person_1"."location_id" = "location_1"."location_id")
=#
When Get
refers to an unknown attribute, an error is reported.
q = Select(Get.person_id)
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `person_id` in:
Select(Get.person_id)
=#
q = From(person) |>
As(:p) |>
Select(Get.q.person_id)
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `q` in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> As(:p) |> Select(Get.q.person_id)
q2
end
=#
An attribute defined in a Join
shadows any previously defined attributes with the same name.
q = person |>
Join(person, true) |>
Select(Get.person_id)
print(render(q))
#=>
SELECT "person_2"."person_id"
FROM "person" AS "person_1"
CROSS JOIN "person" AS "person_2"
=#
An incomplete hierarchical reference, as well as an unexpected hierarchical reference, will result in an error.
q = person |>
As(:p) |>
Select(Get.p)
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: incomplete reference `p` in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> As(:p) |> Select(Get.p)
q2
end
=#
q = person |>
Select(Get.person_id.year_of_birth)
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: unexpected reference after `person_id` in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Select(Get.person_id.year_of_birth)
q2
end
=#
A reference bound to any node other than Get
will cause an error.
q = (qₚ = From(person)) |> Select(qₚ.person_id)
print(render(q))
#=>
ERROR: FunSQL.IllFormedError in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Select(q1.person_id)
q2
end
=#
Any expression could be given a name and attached to a query using the Define
constructor.
q = From(person) |>
Define(:age => Fun.now() .- Get.birth_datetime)
#-> (…) |> Define(…)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age))
q2
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."birth_datetime",
"person_1"."location_id",
(now() - "person_1"."birth_datetime") AS "age"
FROM "person" AS "person_1"
=#
This expression could be referred to by name as if it were a regular table attribute.
print(render(q |> Where(Get.age .> "16 years")))
#=>
SELECT
"person_2"."person_id",
"person_2"."gender_concept_id",
"person_2"."year_of_birth",
"person_2"."month_of_birth",
"person_2"."day_of_birth",
"person_2"."birth_datetime",
"person_2"."location_id",
"person_2"."age"
FROM (
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."birth_datetime",
"person_1"."location_id",
(now() - "person_1"."birth_datetime") AS "age"
FROM "person" AS "person_1"
) AS "person_2"
WHERE ("person_2"."age" > '16 years')
=#
A Define
node can be created using @funsql
notation.
q = @funsql from(person).define(age => 2000 - year_of_birth)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Define(Fun."-"(2000, Get.year_of_birth) |> As(:age))
q2
end
=#
Define
does not create a nested query if the definition is a literal or a simple reference.
q = From(person) |>
Define(:year => Get.year_of_birth,
:threshold => 2000) |>
Where(Get.year .>= Get.threshold)
print(render(q))
#=>
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."birth_datetime",
"person_1"."location_id",
"person_1"."year_of_birth" AS "year",
2000 AS "threshold"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" >= 2000)
=#
Define
can be used to override an existing field.
q = From(person) |>
Define(:person_id => Get.year_of_birth, :year_of_birth => Get.person_id)
print(render(q))
#=>
SELECT
"person_1"."year_of_birth" AS "person_id",
"person_1"."gender_concept_id",
"person_1"."person_id" AS "year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."birth_datetime",
"person_1"."location_id"
FROM "person" AS "person_1"
=#
Define
allows you to insert columns at the beginning or at the end of the column list.
q = From(person) |>
Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
before = true)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |>
Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age),
Get.birth_datetime,
before = true)
q2
end
=#
print(render(q))
#=>
SELECT
(now() - "person_1"."birth_datetime") AS "age",
"person_1"."birth_datetime",
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."location_id"
FROM "person" AS "person_1"
=#
q = From(person) |>
Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
after = true)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |>
Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age),
Get.birth_datetime,
after = true)
q2
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."location_id",
(now() - "person_1"."birth_datetime") AS "age",
"person_1"."birth_datetime"
FROM "person" AS "person_1"
=#
It can also insert columns in front of or right after a specified column.
q = From(person) |>
Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
before = :year_of_birth)
print(render(q))
#=>
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
(now() - "person_1"."birth_datetime") AS "age",
"person_1"."birth_datetime",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."location_id"
FROM "person" AS "person_1"
=#
q = From(person) |>
Define(:age => Fun.now() .- Get.birth_datetime, Get.birth_datetime,
after = :birth_datetime)
print(render(q))
#=>
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
(now() - "person_1"."birth_datetime") AS "age",
"person_1"."birth_datetime",
"person_1"."location_id"
FROM "person" AS "person_1"
=#
It is an error to set both before
and after
or to refer to a non-existent column.
q = From(person) |>
Define(before = true, after = true)
print(render(q))
#=>
ERROR: DomainError with (before = true, after = true):
only one of `before` and `after` could be set
=#
q = Define(before = :person_id)
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `person_id` in:
let q1 = Define(before = :person_id)
q1
end
=#
Define
has no effect if none of the defined fields are used in the query.
q = From(person) |>
Define(:age => 2020 .- Get.year_of_birth) |>
Select(Get.person_id, Get.year_of_birth)
print(render(q))
#=>
SELECT
"person_1"."person_id",
"person_1"."year_of_birth"
FROM "person" AS "person_1"
=#
Define
can be used after Select
.
q = From(person) |>
Select(Get.person_id, Get.year_of_birth) |>
Define(:age => 2020 .- Get.year_of_birth)
print(render(q))
#=>
SELECT
"person_2"."person_id",
"person_2"."year_of_birth",
(2020 - "person_2"."year_of_birth") AS "age"
FROM (
SELECT
"person_1"."person_id",
"person_1"."year_of_birth"
FROM "person" AS "person_1"
) AS "person_2"
=#
Define
requires that all definitions have a unique alias.
From(person) |>
Define(:age => Fun.now() .- Get.birth_datetime,
:age => Fun.current_timestamp() .- Get.birth_datetime)
#=>
ERROR: FunSQL.DuplicateLabelError: `age` is used more than once in:
Define(Fun."-"(Fun.now(), Get.birth_datetime) |> As(:age),
Fun."-"(Fun.current_timestamp(), Get.birth_datetime) |> As(:age))
=#
Variables
A query variable is created with the Var
constructor.
e = Var(:YEAR)
#-> Var.YEAR
Alternatively, use shorthand notation.
Var.YEAR
#-> Var.YEAR
Var."YEAR"
#-> Var.YEAR
Var[:YEAR]
#-> Var.YEAR
Var["YEAR"]
#-> Var.YEAR
A variable could be created with @funsql
notation.
@funsql :YEAR
#-> Var.YEAR
Unbound query variables are serialized as query parameters.
q = From(person) |>
Where(Get.year_of_birth .> Var.YEAR)
sql = render(q)
print(sql)
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > :YEAR)
=#
sql.vars
#-> [:YEAR]
Query variables could be bound using the Bind
constructor.
q0(person_id) =
From(visit_occurrence) |>
Where(Get.person_id .== Var.PERSON_ID) |>
Bind(:PERSON_ID => person_id)
q0(1)
#-> (…) |> Bind(…)
display(q0(1))
#=>
let visit_occurrence = SQLTable(:visit_occurrence, …),
q1 = From(visit_occurrence),
q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID))
q2 |> Bind(1 |> As(:PERSON_ID))
end
=#
print(render(q0(1)))
#=>
SELECT
"visit_occurrence_1"."visit_occurrence_id",
"visit_occurrence_1"."person_id",
"visit_occurrence_1"."visit_start_date",
"visit_occurrence_1"."visit_end_date"
FROM "visit_occurrence" AS "visit_occurrence_1"
WHERE ("visit_occurrence_1"."person_id" = 1)
=#
A Bind
node can be created with @funsql
notation.
q = @funsql begin
from(visit_occurrence)
filter(person_id == :PERSON_ID)
bind(:PERSON_ID => person_id)
end
display(q)
#=>
let q1 = From(:visit_occurrence),
q2 = q1 |> Where(Fun."="(Get.person_id, Var.PERSON_ID))
q2 |> Bind(Get.person_id |> As(:PERSON_ID))
end
=#
Bind
lets us create correlated subqueries.
q = From(person) |>
Where(Fun.exists(q0(Get.person_id)))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE (EXISTS (
SELECT NULL AS "_"
FROM "visit_occurrence" AS "visit_occurrence_1"
WHERE ("visit_occurrence_1"."person_id" = "person_1"."person_id")
))
=#
When an argument to Bind
is an aggregate, it must be evaluated in a nested subquery.
q0(person_id, date) =
From(observation) |>
Where(Fun.and(Get.person_id .== Var.PERSON_ID,
Get.observation_date .>= Var.DATE)) |>
Bind(:PERSON_ID => person_id, :DATE => date)
q = From(visit_occurrence) |>
Group(Get.person_id) |>
Where(Fun.exists(q0(Get.person_id, Agg.max(Get.visit_start_date))))
print(render(q))
#=>
SELECT "visit_occurrence_2"."person_id"
FROM (
SELECT
"visit_occurrence_1"."person_id",
max("visit_occurrence_1"."visit_start_date") AS "max"
FROM "visit_occurrence" AS "visit_occurrence_1"
GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_occurrence_2"
WHERE (EXISTS (
SELECT NULL AS "_"
FROM "observation" AS "observation_1"
WHERE
("observation_1"."person_id" = "visit_occurrence_2"."person_id") AND
("observation_1"."observation_date" >= "visit_occurrence_2"."max")
))
=#
An empty Bind
can be created.
Bind(args = [])
#-> Bind(args = [])
Bind
requires that all variables have a unique name.
Bind(:PERSON_ID => 1, :PERSON_ID => 2)
#=>
ERROR: FunSQL.DuplicateLabelError: `PERSON_ID` is used more than once in:
Bind(1 |> As(:PERSON_ID), 2 |> As(:PERSON_ID))
=#
Functions and Operators
A function or an operator invocation is created with the Fun
constructor.
Fun.">"
#-> Fun.:(">")
e = Fun.">"(Get.year_of_birth, 2000)
#-> Fun.:(">")(…)
display(e)
#-> Fun.">"(Get.year_of_birth, 2000)
Alternatively, Fun
nodes are created by broadcasting. Common Julia operators are replaced with their SQL equivalents.
#? VERSION >= v"1.7"
e = Get.location.state .== "IL" .|| Get.location.zip .!= "60615"
#-> Fun.or(…)
#? VERSION >= v"1.7"
display(e)
#-> Fun.or(Fun."="(Get.location.state, "IL"), Fun."<>"(Get.location.zip, "60615"))
#? VERSION >= v"1.7"
e = .!(e .&& Get.year_of_birth .> 1950 .&& Get.year_of_birth .< 1990)
#-> Fun.not(…)
#? VERSION >= v"1.7"
display(e)
#=>
Fun.not(Fun.and(Fun.or(Fun."="(Get.location.state, "IL"),
Fun."<>"(Get.location.zip, "60615")),
Fun.and(Fun.">"(Get.year_of_birth, 1950),
Fun."<"(Get.year_of_birth, 1990))))
=#
A vector of arguments could be passed directly.
Fun.">"(args = SQLNode[Get.year_of_birth, 2000])
#-> Fun.:(">")(…)
Fun
nodes can be generated in @funsql
notation.
e = @funsql fun(>, year_of_birth, 2000)
display(e)
#-> Fun.">"(Get.year_of_birth, 2000)
In order to generate Fun
nodes using regular function and operator calls, we need to declare these functions and operators in advance.
e = @funsql concat(location.city, ", ", location.state)
display(e)
#-> Fun.concat(Get.location.city, ", ", Get.location.state)
e = @funsql 1950 < year_of_birth < 1990
display(e)
#-> Fun.and(Fun."<"(1950, Get.year_of_birth), Fun."<"(Get.year_of_birth, 1990))
e = @funsql location.state != "IL" || location.zip != 60615
display(e)
#-> Fun.or(Fun."<>"(Get.location.state, "IL"), Fun."<>"(Get.location.zip, 60615))
e = @funsql location.state == "IL" && location.zip == 60615
display(e)
#-> Fun.and(Fun."="(Get.location.state, "IL"), Fun."="(Get.location.zip, 60615))
In @funsql
notation, use backticks to represent a name that is not a valid identifier.
e = @funsql fun(`SUBSTRING(? FROM ? FOR ?)`, city, 1, 1)
display(e)
#-> Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city, 1, 1)
q = @funsql `from`(person).`filter`(year_of_birth <= 1964)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Where(Fun."<="(Get.year_of_birth, 1964))
q2
end
=#
In @funsql
notation, an if
statement is converted to a CASE
expression.
e = @funsql year_of_birth <= 1964 ? "Boomers" : "Millenials"
display(e)
#-> Fun.case(Fun."<="(Get.year_of_birth, 1964), "Boomers", "Millenials")
e = @funsql year_of_birth <= 1964 ? "Boomers" :
year_of_birth <= 1980 ? "Generation X" : "Millenials"
display(e)
#=>
Fun.case(Fun."<="(Get.year_of_birth, 1964),
"Boomers",
Fun."<="(Get.year_of_birth, 1980),
"Generation X",
"Millenials")
=#
e = @funsql if year_of_birth <= 1964; "Boomers"; end
display(e)
#-> Fun.case(Fun."<="(Get.year_of_birth, 1964), "Boomers")
e = @funsql begin
if year_of_birth <= 1964
"Boomers"
elseif year_of_birth <= 1980
"Generation X"
end
end
display(e)
#=>
Fun.case(Fun."<="(Get.year_of_birth, 1964),
"Boomers",
Fun."<="(Get.year_of_birth, 1980),
"Generation X")
=#
e = @funsql begin
if year_of_birth <= 1964
"Boomers"
elseif year_of_birth <= 1980
"Generation X"
elseif year_of_birth <= 1996
"Millenials"
else
"Generation Z"
end
end
display(e)
#=>
Fun.case(Fun."<="(Get.year_of_birth, 1964),
"Boomers",
Fun."<="(Get.year_of_birth, 1980),
"Generation X",
Fun."<="(Get.year_of_birth, 1996),
"Millenials",
"Generation Z")
=#
In a SELECT
clause, the function name becomes the column alias.
q = From(location) |>
Select(Fun.coalesce(Get.city, "N/A"))
print(render(q))
#=>
SELECT coalesce("location_1"."city", 'N/A') AS "coalesce"
FROM "location" AS "location_1"
=#
When the name contains only symbol characters, or when it starts or ends with a space character, it is interpreted as an operator.
q = From(location) |>
Select(Fun." || "(Get.city, ", ", Get.state))
print(render(q))
#=>
SELECT ("location_1"."city" || ', ' || "location_1"."state") AS "_"
FROM "location" AS "location_1"
=#
The function name containing ?
serves as a template.
q = From(location) |>
Select(Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city, 1, 1))
print(render(q))
#=>
SELECT SUBSTRING("location_1"."city" FROM 1 FOR 1) AS "_"
FROM "location" AS "location_1"
=#
The number of arguments to a function must coincide with the number of placeholders in the template.
Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city)
#=>
ERROR: FunSQL.InvalidArityError: `SUBSTRING(? FROM ? FOR ?)` expects 3 arguments, got 1 in:
Fun."SUBSTRING(? FROM ? FOR ?)"(Get.city)
=#
Some common functions also validate the number of arguments.
Fun.case()
#=>
ERROR: FunSQL.InvalidArityError: `case` expects at least 2 arguments, got 0 in:
Fun.case()
=#
Fun.is_null(Get.city, Get.state)
#=>
ERROR: FunSQL.InvalidArityError: `is_null` expects 1 argument, got 2 in:
Fun.is_null(Get.city, Get.state)
=#
Fun.count(Get.city, Get.state)
#=>
ERROR: FunSQL.InvalidArityError: `count` expects from 0 to 1 argument, got 2 in:
Fun.count(Get.city, Get.state)
=#
A function invocation may include a nested query.
p = From(person) |>
Where(Get.year_of_birth .> 1950)
q = Select(Fun.exists(p))
print(render(q))
#=>
SELECT (EXISTS (
SELECT NULL AS "_"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1950)
)) AS "exists"
=#
p = From(concept) |>
Where(Fun.and(Get.vocabulary_id .== "Gender",
Get.concept_code .== "F")) |>
Select(Get.concept_id)
q = From(person) |>
Where(Fun.in(Get.gender_concept_id, p))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."gender_concept_id" IN (
SELECT "concept_1"."concept_id"
FROM "concept" AS "concept_1"
WHERE
("concept_1"."vocabulary_id" = 'Gender') AND
("concept_1"."concept_code" = 'F')
))
=#
FunSQL can simplify logical expressions.
q = From(person) |>
Where(Fun.and())
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
=#
q = From(person) |>
Select(Get.person_id) |>
Where(Fun.and())
print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#
q = From(person) |>
Where(Fun.and(Get.year_of_birth .> 1950))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1950)
=#
q = From(person) |>
Where(foldl(Fun.and, [Get.year_of_birth .> 1950, Get.year_of_birth .< 1960, Get.year_of_birth .!= 1955], init = Fun.and()))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE
("person_1"."year_of_birth" > 1950) AND
("person_1"."year_of_birth" < 1960) AND
("person_1"."year_of_birth" <> 1955)
=#
q = From(person) |>
Where(Fun.or())
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE FALSE
=#
q = From(person) |>
Where(Fun.or(Get.year_of_birth .> 1950))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1950)
=#
q = From(person) |>
Where(Fun.or(Fun.or(Fun.or(), Get.year_of_birth .> 1950), Get.year_of_birth .< 1960))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE
("person_1"."year_of_birth" > 1950) OR
("person_1"."year_of_birth" < 1960)
=#
#? VERSION >= v"1.7"
q = From(person) |>
Where(Get.year_of_birth .> 1950 .|| Get.year_of_birth .< 1960 .|| Get.year_of_birth .!= 1955)
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE
("person_1"."year_of_birth" > 1950) OR
("person_1"."year_of_birth" < 1960) OR
("person_1"."year_of_birth" <> 1955)
=#
q = From(person) |>
Where(Fun.not(false))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
=#
Append
The Append
constructor creates a subquery that concatenates the output of multiple queries.
q = From(measurement) |>
Define(:date => Get.measurement_date) |>
Append(From(observation) |>
Define(:date => Get.observation_date))
#-> (…) |> Append(…)
display(q)
#=>
let measurement = SQLTable(:measurement, …),
observation = SQLTable(:observation, …),
q1 = From(measurement),
q2 = q1 |> Define(Get.measurement_date |> As(:date)),
q3 = From(observation),
q4 = q3 |> Define(Get.observation_date |> As(:date)),
q5 = q2 |> Append(q4)
q5
end
=#
print(render(q |> Select(Get.person_id, Get.date)))
#=>
SELECT
"union_1"."person_id",
"union_1"."date"
FROM (
SELECT
"measurement_1"."person_id",
"measurement_1"."measurement_date" AS "date"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT
"observation_1"."person_id",
"observation_1"."observation_date" AS "date"
FROM "observation" AS "observation_1"
) AS "union_1"
=#
Append
can also be specified without the over
node.
q = Append(From(measurement) |>
Define(:date => Get.measurement_date),
From(observation) |>
Define(:date => Get.observation_date)) |>
Select(Get.person_id, Get.date)
print(render(q))
#=>
SELECT
"union_1"."person_id",
"union_1"."date"
FROM (
SELECT
"measurement_1"."person_id",
"measurement_1"."measurement_date" AS "date"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT
"observation_1"."person_id",
"observation_1"."observation_date" AS "date"
FROM "observation" AS "observation_1"
) AS "union_1"
=#
An Append
node can be created using @funsql
notation.
q = @funsql begin
from(measurement).define(date => measurement_date)
append(from(observation).define(date => observation_date))
end
display(q)
#=>
let q1 = From(:measurement),
q2 = q1 |> Define(Get.measurement_date |> As(:date)),
q3 = From(:observation),
q4 = q3 |> Define(Get.observation_date |> As(:date)),
q5 = q2 |> Append(q4)
q5
end
=#
Append
will automatically assign unique aliases to the exported columns.
q = From(measurement) |>
Define(:concept_id => Get.measurement_concept_id) |>
Group(Get.person_id) |>
Define(:count => 1, :count_2 => 2) |>
Append(From(observation) |>
Define(:concept_id => Get.observation_concept_id) |>
Group(Get.person_id) |>
Define(:count => 10, :count_2 => 20)) |>
Select(Get.person_id, :agg_count => Agg.count(), Get.count_2, Get.count)
print(render(q))
#=>
SELECT
"union_1"."person_id",
"union_1"."count" AS "agg_count",
"union_1"."count_2",
"union_1"."count_3" AS "count"
FROM (
SELECT
"measurement_1"."person_id",
count(*) AS "count",
2 AS "count_2",
1 AS "count_3"
FROM "measurement" AS "measurement_1"
GROUP BY "measurement_1"."person_id"
UNION ALL
SELECT
"observation_1"."person_id",
count(*) AS "count",
20 AS "count_2",
10 AS "count_3"
FROM "observation" AS "observation_1"
GROUP BY "observation_1"."person_id"
) AS "union_1"
=#
Append
will not put duplicate expressions into the SELECT
clauses of the nested subqueries.
q = From(person) |>
Join(From(measurement) |>
Define(:date => Get.measurement_date) |>
Append(From(observation) |>
Define(:date => Get.observation_date)) |>
As(:assessment),
on = Get.person_id .== Get.assessment.person_id) |>
Where(Get.assessment.date .> Fun.current_timestamp()) |>
Select(Get.person_id, Get.assessment.date)
print(render(q))
#=>
SELECT
"person_1"."person_id",
"assessment_1"."date"
FROM "person" AS "person_1"
JOIN (
SELECT
"measurement_1"."measurement_date" AS "date",
"measurement_1"."person_id"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT
"observation_1"."observation_date" AS "date",
"observation_1"."person_id"
FROM "observation" AS "observation_1"
) AS "assessment_1" ON ("person_1"."person_id" = "assessment_1"."person_id")
WHERE ("assessment_1"."date" > CURRENT_TIMESTAMP)
=#
q = From(measurement) |>
Define(:date => Get.measurement_date) |>
Append(From(observation) |>
Define(:date => Get.observation_date)) |>
Group(Get.date) |>
Define(Agg.count())
print(render(q))
#=>
SELECT
"union_1"."date",
count(*) AS "count"
FROM (
SELECT "measurement_1"."measurement_date" AS "date"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT "observation_1"."observation_date" AS "date"
FROM "observation" AS "observation_1"
) AS "union_1"
GROUP BY "union_1"."date"
=#
Append
aligns the columns of its subqueries.
q = From(measurement) |>
Select(Get.person_id, :date => Get.measurement_date) |>
Append(From(observation) |>
Select(:date => Get.observation_date, Get.person_id))
print(render(q))
#=>
SELECT
"measurement_1"."person_id",
"measurement_1"."measurement_date" AS "date"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT
"observation_2"."person_id",
"observation_2"."date"
FROM (
SELECT
"observation_1"."observation_date" AS "date",
"observation_1"."person_id"
FROM "observation" AS "observation_1"
) AS "observation_2"
=#
Arguments of Append
may contain ORDER BY
or LIMIT
clauses, which must be wrapped in a nested subquery.
q = From(measurement) |>
Order(Get.measurement_date) |>
Select(Get.person_id, :date => Get.measurement_date) |>
Append(From(observation) |>
Define(:date => Get.observation_date) |>
Limit(1))
print(render(q))
#=>
SELECT
"measurement_2"."person_id",
"measurement_2"."date"
FROM (
SELECT
"measurement_1"."person_id",
"measurement_1"."measurement_date" AS "date"
FROM "measurement" AS "measurement_1"
ORDER BY "measurement_1"."measurement_date"
) AS "measurement_2"
UNION ALL
SELECT
"observation_2"."person_id",
"observation_2"."date"
FROM (
SELECT
"observation_1"."person_id",
"observation_1"."observation_date" AS "date"
FROM "observation" AS "observation_1"
FETCH FIRST 1 ROW ONLY
) AS "observation_2"
=#
An Append
without any queries can be created explicitly.
q = Append(args = [])
#-> Append(args = [])
print(render(q))
#=>
SELECT NULL AS "_"
WHERE FALSE
=#
Without an explicit Select
, the output of Append
includes the common columns of the nested queries.
q = Append(measurement, observation)
print(render(q))
#=>
SELECT "measurement_1"."person_id"
FROM "measurement" AS "measurement_1"
UNION ALL
SELECT "observation_1"."person_id"
FROM "observation" AS "observation_1"
=#
Iterate
The Iterate
constructor creates an iteration query. In the argument of Iterate
, the From(^)
node refers to the output of the previous iteration. We could use Iterate
and From(^)
to create a factorial table.
q = Define(:n => 1, :f => 1) |>
Iterate(From(^) |>
Define(:n => Get.n .+ 1, :f => Get.f .* (Get.n .+ 1)) |>
Where(Get.n .<= 10))
#-> (…) |> Iterate(…)
display(q)
#=>
let q1 = Define(1 |> As(:n), 1 |> As(:f)),
q2 = From(^),
q3 = q2 |>
Define(Fun."+"(Get.n, 1) |> As(:n),
Fun."*"(Get.f, Fun."+"(Get.n, 1)) |> As(:f)),
q4 = q3 |> Where(Fun."<="(Get.n, 10)),
q5 = q1 |> Iterate(q4)
q5
end
=#
print(render(q))
#=>
WITH RECURSIVE "__1" ("n", "f") AS (
SELECT
1 AS "n",
1 AS "f"
UNION ALL
SELECT
"__3"."n",
"__3"."f"
FROM (
SELECT
("__2"."n" + 1) AS "n",
("__2"."f" * ("__2"."n" + 1)) AS "f"
FROM "__1" AS "__2"
) AS "__3"
WHERE ("__3"."n" <= 10)
)
SELECT
"__4"."n",
"__4"."f"
FROM "__1" AS "__4"
=#
An Iterate
node can be created using @funsql
notation.
q = @funsql begin
define(n => 1, f => 1)
iterate(define(n => n + 1, f => f * (n + 1)).filter(n <= 10))
end
display(q)
#=>
let q1 = Define(1 |> As(:n), 1 |> As(:f)),
q2 = Define(Fun."+"(Get.n, 1) |> As(:n),
Fun."*"(Get.f, Fun."+"(Get.n, 1)) |> As(:f)),
q3 = q2 |> Where(Fun."<="(Get.n, 10)),
q4 = q1 |> Iterate(q3)
q4
end
=#
The From(^)
node in front of the iterator query can be omitted.
q = Define(:n => 1, :f => 1) |>
Iterate(Define(:n => Get.n .+ 1, :f => Get.f .* (Get.n .+ 1)) |>
Where(Get.n .<= 10))
print(render(q))
#=>
WITH RECURSIVE "__1" ("n", "f") AS (
SELECT
1 AS "n",
1 AS "f"
UNION ALL
SELECT
"__3"."n",
"__3"."f"
FROM (
SELECT
("__2"."n" + 1) AS "n",
("__2"."f" * ("__2"."n" + 1)) AS "f"
FROM "__1" AS "__2"
) AS "__3"
WHERE ("__3"."n" <= 10)
)
SELECT
"__4"."n",
"__4"."f"
FROM "__1" AS "__4"
=#
An Iterate
node may use a CTE.
q = Define(:n => 1, :f => 1) |>
Iterate(Define(:n => Get.n .+ 1, :f => Get.f .* (Get.n .+ 1)) |>
CrossJoin(From(:threshold)) |>
Where(Get.n .<= Get.threshold)) |>
With(:threshold => Define(:threshold => 10))
print(render(q))
#=>
WITH RECURSIVE "threshold_1" ("threshold") AS (
SELECT 10 AS "threshold"
),
"__1" ("n", "f") AS (
SELECT
1 AS "n",
1 AS "f"
UNION ALL
SELECT
"__3"."n",
"__3"."f"
FROM (
SELECT
("__2"."n" + 1) AS "n",
("__2"."f" * ("__2"."n" + 1)) AS "f",
"threshold_2"."threshold"
FROM "__1" AS "__2"
CROSS JOIN "threshold_1" AS "threshold_2"
) AS "__3"
WHERE ("__3"."n" <= "__3"."threshold")
)
SELECT
"__4"."n",
"__4"."f"
FROM "__1" AS "__4"
=#
It is an error to use From(^)
outside of Iterate
.
q = From(^)
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: self-reference outside of Iterate in:
let q1 = From(^)
q1
end
=#
The set of columns produced by Iterate
is the intersection of the columns produced by the base query and the iterator query.
q = Define(:k => 0, :m => 0) |>
Iterate(As(:previous) |>
Where(Get.previous.m .< 10) |>
Define(:m => Get.previous.m .+ 1, :n => 0))
print(render(q))
#=>
WITH RECURSIVE "previous_1" ("m") AS (
SELECT 0 AS "m"
UNION ALL
SELECT ("previous_2"."m" + 1) AS "m"
FROM "previous_1" AS "previous_2"
WHERE ("previous_2"."m" < 10)
)
SELECT "previous_3"."m"
FROM "previous_1" AS "previous_3"
=#
Iterate
aligns the columns of its subqueries.
q = Select(:n => 1, :f => 1) |>
Iterate(Where(Get.n .< 10) |>
Select(:f => (Get.n .+ 1) .* Get.f,
:n => Get.n .+ 1))
print(render(q))
#=>
WITH RECURSIVE "__1" ("n", "f") AS (
SELECT
1 AS "n",
1 AS "f"
UNION ALL
SELECT
"__3"."n",
"__3"."f"
FROM (
SELECT
(("__2"."n" + 1) * "__2"."f") AS "f",
("__2"."n" + 1) AS "n"
FROM "__1" AS "__2"
WHERE ("__2"."n" < 10)
) AS "__3"
)
SELECT
"__4"."n",
"__4"."f"
FROM "__1" AS "__4"
=#
As
An alias to an expression can be added with the As
constructor.
e = 42 |> As(:integer)
#-> (…) |> As(:integer)
display(e)
#-> 42 |> As(:integer)
print(render(Select(e)))
#=>
SELECT 42 AS "integer"
=#
As
node can be created with @funsql
.
e = @funsql (42).as(integer)
display(e)
#-> 42 |> As(:integer)
The =>
shorthand is supported by @funsql
.
e = @funsql integer => 42
display(e)
#-> :integer => 42
As
is also used to create an alias for a subquery.
q = From(person) |>
As(:p) |>
Select(Get.p.person_id)
print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#
As
blocks the default output columns.
q = From(person) |> As(:p)
print(render(q))
#=>
SELECT NULL AS "_"
FROM "person" AS "person_1"
=#
From
The From
constructor creates a subquery that selects columns from the given table.
q = From(person)
#-> From(…)
display(q)
#-> From(SQLTable(:person, …))
By default, From
selects all columns from the table.
print(render(q))
#=>
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth",
"person_1"."month_of_birth",
"person_1"."day_of_birth",
"person_1"."birth_datetime",
"person_1"."location_id"
FROM "person" AS "person_1"
=#
From
adds the schema qualifier when the table has the schema.
const pg_database =
SQLTable(qualifiers = [:pg_catalog], :pg_database, columns = [:oid, :datname])
q = From(pg_database)
print(render(q))
#=>
SELECT
"pg_database_1"."oid",
"pg_database_1"."datname"
FROM "pg_catalog"."pg_database" AS "pg_database_1"
=#
In a suitable context, a SQLTable
object is automatically converted to a From
subquery.
print(render(person))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
=#
From
and other subqueries generate a correct SELECT
clause when the table has no columns.
empty = SQLTable(:empty, columns = Symbol[])
q = From(empty) |>
Where(false) |>
Select(args = [])
display(q)
#=>
let empty = SQLTable(:empty, …),
q1 = From(empty),
q2 = q1 |> Where(false),
q3 = q2 |> Select(args = [])
q3
end
=#
print(render(q))
#=>
SELECT NULL AS "_"
FROM "empty" AS "empty_1"
WHERE FALSE
=#
When From
takes a Tables-compatible argument, it generates a VALUES
query.
using DataFrames
df = DataFrame(name = ["SQL", "Julia", "FunSQL"],
year = [1974, 2012, 2021])
q = From(df)
#-> From(…)
display(q)
#-> From((name = ["SQL", …], year = [1974, …]))
print(render(q))
#=>
SELECT
"values_1"."name",
"values_1"."year"
FROM (
VALUES
('SQL', 1974),
('Julia', 2012),
('FunSQL', 2021)
) AS "values_1" ("name", "year")
=#
SQLite does not support column aliases with AS
clause.
print(render(q, dialect = :sqlite))
#=>
SELECT
"values_1"."column1" AS "name",
"values_1"."column2" AS "year"
FROM (
VALUES
('SQL', 1974),
('Julia', 2012),
('FunSQL', 2021)
) AS "values_1"
=#
Only columns that are used in the query will be serialized.
q = From(df) |>
Select(Get.name)
print(render(q))
#=>
SELECT "values_1"."name"
FROM (
VALUES
('SQL'),
('Julia'),
('FunSQL')
) AS "values_1" ("name")
=#
A column of NULLs will be added if no actual columns are used.
q = From(df) |>
Group() |>
Select(Agg.count())
print(render(q))
#=>
SELECT count(*) AS "count"
FROM (
VALUES
(NULL),
(NULL),
(NULL)
) AS "values_1" ("_")
=#
Since VALUES
clause requires at least one row of data, a different representation is used when the source table is empty.
q = From(df[1:0, :])
print(render(q))
#=>
SELECT
NULL AS "name",
NULL AS "year"
WHERE FALSE
=#
The source table must have at least one column.
q = From(df[1:0, 1:0])
#=>
ERROR: DomainError with 0×0 DataFrame:
a table with at least one column is expected
=#
From
can accept a table-valued function. Since the output type of the function is not known to FunSQL, you must manually specify the names of the output columns.
q = From(Fun.generate_series(0, 100, 10), columns = [:value])
#-> From(…, columns = [:value])
display(q)
#-> From(Fun.generate_series(0, 100, 10), columns = [:value])
print(render(q))
#=>
SELECT "generate_series_1"."value"
FROM generate_series(0, 100, 10) AS "generate_series_1" ("value")
=#
WITH ORDINALITY
annotation adds an extra column that enumerates the output rows.
q = From(Fun."? WITH ORDINALITY"(Fun.generate_series(0, 100, 10)),
columns = [:value, :index])
print(render(q))
#=>
SELECT
"__1"."value",
"__1"."index"
FROM generate_series(0, 100, 10) WITH ORDINALITY AS "__1" ("value", "index")
=#
A From
node can be created with @funsql
notation.
q = @funsql from(person)
display(q)
#-> From(:person)
q = @funsql from(nothing)
display(q)
#-> From(nothing)
q = @funsql from(^)
display(q)
#-> From(^)
q = @funsql from($person)
display(q)
#-> From(SQLTable(:person, …))
q = @funsql from($df)
display(q)
#-> From((name = ["SQL", …], year = [1974, …]))
funsql_generate_series = FunSQL.FunClosure(:generate_series)
q = @funsql from(generate_series(0, 100, 10), columns = [value])
display(q)
#-> From(Fun.generate_series(0, 100, 10), columns = [:value])
When From
with a tabular function is attached to the right branch of a Join
node, the function may use data from the left branch of Join
, even without being wrapped in a Bind
node.
q = From(Fun.regexp_split_to_table("(10,20)-(30,40)-(50,60)", "-"),
columns = [:point]) |>
CrossJoin(From(Fun.regexp_matches(Get.point, "(\\d+),(\\d+)"),
columns = [:captures])) |>
Select(:x => Fun."CAST(?[1] AS INTEGER)"(Get.captures),
:y => Fun."CAST(?[2] AS INTEGER)"(Get.captures))
print(render(q))
#=>
SELECT
CAST("regexp_matches_1"."captures"[1] AS INTEGER) AS "x",
CAST("regexp_matches_1"."captures"[2] AS INTEGER) AS "y"
FROM regexp_split_to_table('(10,20)-(30,40)-(50,60)', '-') AS "regexp_split_to_table_1" ("point")
CROSS JOIN regexp_matches("regexp_split_to_table_1"."point", '(\d+),(\d+)') AS "regexp_matches_1" ("captures")
=#
All the columns of a tabular function must have distinct names.
From(Fun."? WITH ORDINALITY"(Fun.generate_series(0, 100, 10)),
columns = [:index, :index])
#=>
ERROR: FunSQL.DuplicateLabelError: `index` is used more than once in:
let q1 = From(Fun."? WITH ORDINALITY"(Fun.generate_series(0, 100, 10)),
columns = [:index, :index])
q1
end
=#
From(nothing)
will generate a unit dataset with one row.
q = From(nothing)
display(q)
#-> From(nothing)
print(render(q))
#=>
SELECT NULL AS "_"
=#
With
, Over
, and WithExternal
We can create a temporary dataset using With
and refer to it with From
.
q = From(:male) |>
With(From(person) |>
Where(Get.gender_concept_id .== 8507) |>
As(:male))
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(:male),
q2 = From(person),
q3 = q2 |> Where(Fun."="(Get.gender_concept_id, 8507)),
q4 = q1 |> With(q3 |> As(:male))
q4
end
=#
print(render(q))
#=>
WITH "male_1" ("person_id", …, "location_id") AS (
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."gender_concept_id" = 8507)
)
SELECT
"male_2"."person_id",
⋮
"male_2"."location_id"
FROM "male_1" AS "male_2"
=#
With
definitions can be annotated as materialized or not materialized:
q = From(:male) |>
With(From(person) |>
Where(Get.gender_concept_id .== 8507) |>
As(:male),
materialized = true)
#-> (…) |> With(…, materialized = true)
print(render(q))
#=>
WITH "male_1" ( … ) AS MATERIALIZED (
⋮
)
SELECT
⋮
FROM "male_1" AS "male_2"
=#
q = From(:male) |>
With(From(person) |>
Where(Get.gender_concept_id .== 8507) |>
As(:male),
materialized = false)
print(render(q))
#=>
WITH "male_1" ( … ) AS NOT MATERIALIZED (
⋮
)
SELECT
⋮
FROM "male_1" AS "male_2"
=#
With
can take more than one definition.
q = Select(:male_count => From(:male) |> Group() |> Select(Agg.count()),
:female_count => From(:female) |> Group() |> Select(Agg.count())) |>
With(:male => From(person) |> Where(Get.gender_concept_id .== 8507),
:female => From(person) |> Where(Get.gender_concept_id .== 8532))
print(render(q))
#=>
WITH "male_1" ("_") AS (
SELECT NULL AS "_"
FROM "person" AS "person_1"
WHERE ("person_1"."gender_concept_id" = 8507)
),
"female_1" ("_") AS (
SELECT NULL AS "_"
FROM "person" AS "person_2"
WHERE ("person_2"."gender_concept_id" = 8532)
)
SELECT
(
SELECT count(*) AS "count"
FROM "male_1" AS "male_2"
) AS "male_count",
(
SELECT count(*) AS "count"
FROM "female_1" AS "female_2"
) AS "female_count"
=#
With
can shadow the previous With
definition.
q = From(:cohort) |>
With(:cohort => From(:cohort) |> Where(Get.gender_concept_id .== 8507)) |>
With(:cohort => From(:cohort) |> Where(Get.year_of_birth .>= 1950)) |>
With(:cohort => From(person)) |>
Select(Get.person_id)
print(render(q))
#=>
WITH "cohort_1" ("person_id", "gender_concept_id", "year_of_birth") AS (
SELECT
"person_1"."person_id",
"person_1"."gender_concept_id",
"person_1"."year_of_birth"
FROM "person" AS "person_1"
),
"cohort_3" ("person_id", "gender_concept_id") AS (
SELECT
"cohort_2"."person_id",
"cohort_2"."gender_concept_id"
FROM "cohort_1" AS "cohort_2"
WHERE ("cohort_2"."year_of_birth" >= 1950)
),
"cohort_5" ("person_id") AS (
SELECT "cohort_4"."person_id"
FROM "cohort_3" AS "cohort_4"
WHERE ("cohort_4"."gender_concept_id" = 8507)
)
SELECT "cohort_6"."person_id"
FROM "cohort_5" AS "cohort_6"
=#
A With
node can be created using @funsql
.
q = @funsql begin
from(male)
with(male => from(person).filter(gender_concept_id == 8507),
materialized = false)
end
display(q)
#=>
let q1 = From(:male),
q2 = From(:person),
q3 = q2 |> Where(Fun."="(Get.gender_concept_id, 8507)),
q4 = q1 |> With(q3 |> As(:male), materialized = false)
q4
end
=#
A dataset defined by With
must have an explicit label assigned to it.
q = From(:person) |>
With(From(person))
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: table reference `person` requires As in:
let person = SQLTable(:person, …),
q1 = From(:person),
q2 = From(person),
q3 = q1 |> With(q2)
q3
end
=#
Datasets defined by With
must have a unique label.
From(:p) |>
With(:p => From(person),
:p => From(person))
#=>
ERROR: FunSQL.DuplicateLabelError: `p` is used more than once in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = From(person),
q3 = With(q1 |> As(:p), q2 |> As(:p))
q3
end
=#
It is an error for From
to refer to an undefined dataset.
q = From(:p)
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: cannot find `p` in:
let q1 = From(:p)
q1
end
=#
A variant of With
called Over
exchanges the positions of the definition and the query that uses it.
q = From(person) |>
Where(Get.gender_concept_id .== 8507) |>
As(:male) |>
Over(From(:male))
#-> (…) |> Over(…)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Where(Fun."="(Get.gender_concept_id, 8507)),
q3 = From(:male),
q4 = q2 |> As(:male) |> Over(q3)
q4
end
=#
print(render(q))
#=>
WITH "male_1" ("person_id", …, "location_id") AS (
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."gender_concept_id" = 8507)
)
SELECT
"male_2"."person_id",
⋮
"male_2"."location_id"
FROM "male_1" AS "male_2"
=#
An Over
node can be created using @funsql
.
q = @funsql begin
male => from(person).filter(gender_concept_id == 8507)
over(from(male), materialized = true)
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Where(Fun."="(Get.gender_concept_id, 8507)),
q3 = From(:male),
q4 = q2 |> As(:male) |> Over(q3, materialized = true)
q4
end
=#
A variant of With
called WithExternal
can be used to prepare a definition for a CREATE TABLE AS
or SELECT INTO
statement.
with_external_handler((tbl, def)) =
println("CREATE TEMP TABLE ",
render(ID(tbl.qualifiers, tbl.name)),
" (", join([render(ID(c.name)) for (n, c) in tbl.columns], ", "), ") AS\n",
render(def), ";\n")
q = From(:male) |>
WithExternal(From(person) |>
Where(Get.gender_concept_id .== 8507) |>
As(:male),
qualifiers = [:tmp],
handler = with_external_handler)
#-> (…) |> WithExternal(…, qualifiers = [:tmp], handler = with_external_handler)
print(render(q))
#=>
CREATE TEMP TABLE "tmp"."male" ("person_id", …, "location_id") AS
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."gender_concept_id" = 8507);
SELECT
"male_1"."person_id",
⋮
"male_1"."location_id"
FROM "tmp"."male" AS "male_1"
=#
Datasets defined by WithExternal
must have a unique label.
From(:p) |>
WithExternal(:p => From(person),
:p => From(person))
#=>
ERROR: FunSQL.DuplicateLabelError: `p` is used more than once in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = From(person),
q3 = WithExternal(q1 |> As(:p), q2 |> As(:p))
q3
end
=#
Group
The Group
constructor creates a subquery that summarizes the rows partitioned by the given keys.
q = From(person) |>
Group(Get.year_of_birth)
#-> (…) |> Group(…)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Group(Get.year_of_birth)
q2
end
=#
print(render(q))
#=>
SELECT DISTINCT "person_1"."year_of_birth"
FROM "person" AS "person_1"
=#
A Group
node can be created using @funsql
notation.
q = @funsql from(person).group(year_of_birth)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Group(Get.year_of_birth)
q2
end
=#
Partitions created by Group
are summarized using aggregate expressions.
Agg.count
#-> Agg.count
q = From(person) |>
Group(Get.year_of_birth) |>
Select(Get.year_of_birth, Agg.count())
print(render(q))
#=>
SELECT
"person_1"."year_of_birth",
count(*) AS "count"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
=#
Aggregate functions can be created with @funsql
.
e = @funsql agg(min, year_of_birth)
display(e)
#-> Agg.min(Get.year_of_birth)
e = @funsql min(year_of_birth)
display(e)
#-> Agg.min(Get.year_of_birth)
e = @funsql count(filter = year_of_birth > 1950)
display(e)
#-> Agg.count(filter = Fun.">"(Get.year_of_birth, 1950))
e = @funsql visit_group.count()
display(e)
#-> Get.visit_group |> Agg.count()
e = @funsql `count`()
display(e)
#-> Agg.count()
e = @funsql visit_group.`count`()
display(e)
#-> Get.visit_group |> Agg.count()
e = @funsql `visit_group`.`count`()
display(e)
#-> Get.visit_group |> Agg.count()
Group
will create a single instance of an aggregate function even if it is used more than once.
q = From(person) |>
Join(:visit_group => From(visit_occurrence) |>
Group(Get.person_id),
on = Get.person_id .== Get.visit_group.person_id) |>
Where(Agg.count(over = Get.visit_group) .>= 2) |>
Select(Get.person_id, Agg.count(over = Get.visit_group))
print(render(q))
#=>
SELECT
"person_1"."person_id",
"visit_group_1"."count"
FROM "person" AS "person_1"
JOIN (
SELECT
count(*) AS "count",
"visit_occurrence_1"."person_id"
FROM "visit_occurrence" AS "visit_occurrence_1"
GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_group_1" ON ("person_1"."person_id" = "visit_group_1"."person_id")
WHERE ("visit_group_1"."count" >= 2)
=#
Group
creates a nested subquery when this is necessary to avoid duplicating the group key expression.
q = From(person) |>
Group(:age => 2000 .- Get.year_of_birth)
print(render(q))
#=>
SELECT DISTINCT (2000 - "person_1"."year_of_birth") AS "age"
FROM "person" AS "person_1"
=#
q = From(person) |>
Group(:age => 2000 .- Get.year_of_birth) |>
Select(Agg.count())
print(render(q))
#=>
SELECT count(*) AS "count"
FROM "person" AS "person_1"
GROUP BY (2000 - "person_1"."year_of_birth")
=#
q = From(person) |>
Group(:age => 2000 .- Get.year_of_birth) |>
Define(Agg.count())
print(render(q))
#=>
SELECT
"person_2"."age",
count(*) AS "count"
FROM (
SELECT (2000 - "person_1"."year_of_birth") AS "age"
FROM "person" AS "person_1"
) AS "person_2"
GROUP BY "person_2"."age"
=#
Group
could be used consequently.
q = From(measurement) |>
Group(Get.measurement_concept_id) |>
Group(Agg.count()) |>
Select(Get.count, :size => Agg.count())
print(render(q))
#=>
SELECT
"measurement_2"."count",
count(*) AS "size"
FROM (
SELECT count(*) AS "count"
FROM "measurement" AS "measurement_1"
GROUP BY "measurement_1"."measurement_concept_id"
) AS "measurement_2"
GROUP BY "measurement_2"."count"
=#
Group
accepts an empty list of keys.
q = From(person) |>
Group() |>
Select(Agg.count(), Agg.min(Get.year_of_birth), Agg.max(Get.year_of_birth))
print(render(q))
#=>
SELECT
count(*) AS "count",
min("person_1"."year_of_birth") AS "min",
max("person_1"."year_of_birth") AS "max"
FROM "person" AS "person_1"
=#
Group
with no keys and no aggregates creates a trivial subquery.
q = From(person) |>
Group()
print(render(q))
#-> SELECT NULL AS "_"
A SELECT DISTINCT
query must include all the keys even when they are not used downstream.
q = From(person) |>
Group(Get.year_of_birth) |>
Group() |>
Select(Agg.count())
print(render(q))
#=>
SELECT count(*) AS "count"
FROM (
SELECT DISTINCT "person_1"."year_of_birth"
FROM "person" AS "person_1"
) AS "person_2"
=#
Group
allows specifying the grouping sets, either with grouping mode indicators :cube
or :rollup
, or by explicit enumeration.
q = From(person) |>
Group(Get.year_of_birth, sets = :cube)
Define(Agg.count())
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Group(Get.year_of_birth, sets = :CUBE)
q2
end
=#
print(render(q))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
GROUP BY CUBE("person_1"."year_of_birth")
=#
q = From(person) |>
Group(Get.year_of_birth, sets = [[1], Int[]])
Define(Agg.count())
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Group(Get.year_of_birth, sets = [[1], []])
q2
end
=#
print(render(q))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
GROUP BY GROUPING SETS(("person_1"."year_of_birth"), ())
=#
Group
allows specifying grouping sets using names of the grouping keys.
q = From(person) |>
Group(Get.year_of_birth, Get.gender_concept_id,
sets = ([:year_of_birth], ["gender_concept_id"]))
Define(Agg.count())
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |>
Group(Get.year_of_birth, Get.gender_concept_id, sets = [[1], [2]])
q2
end
=#
Group
will report when a grouping set refers to an unknown key.
From(person) |>
Group(Get.year_of_birth, sets = [[:gender_concept_id], []])
#=>
ERROR: FunSQL.InvalidGroupingSetsError: `gender_concept_id` is not a valid key
=#
Group
complains about out-of-bound or incomplete grouping sets.
From(person) |>
Group(Get.year_of_birth, sets = [[1, 2], [1], []])
#=>
ERROR: FunSQL.InvalidGroupingSetsError: `2` is out of bounds in:
let q1 = Group(Get.year_of_birth, sets = [[1, 2], [1], []])
q1
end
=#
From(person) |>
Group(Get.year_of_birth, Get.gender_concept_id,
sets = [[1], []])
#=>
ERROR: FunSQL.InvalidGroupingSetsError: missing keys `[:year_of_birth]` in:
let q1 = Group(Get.year_of_birth, Get.gender_concept_id, sets = [[1], []])
q1
end
=#
Group
allows specifying the name of a group field.
q = From(person) |>
Group(Get.year_of_birth, name = :person) |>
Define(Get.person |> Agg.count())
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Group(Get.year_of_birth, name = :person),
q3 = q2 |> Define(Get.person |> Agg.count())
q3
end
=#
print(render(q))
#=>
SELECT
"person_1"."year_of_birth",
count(*) AS "count"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
=#
Group
requires all keys to have unique aliases.
q = From(person) |>
Group(Get.person_id, Get.person_id)
#=>
ERROR: FunSQL.DuplicateLabelError: `person_id` is used more than once in:
Group(Get.person_id, Get.person_id)
=#
The name of group field must also be unique.
q = From(person) |>
Group(:group => Get.year_of_birth, name = :group)
#=>
ERROR: FunSQL.DuplicateLabelError: `group` is used more than once in:
Group(Get.year_of_birth |> As(:group), name = :group)
=#
Group
ensures that each aggregate expression gets a unique alias.
q = From(person) |>
Join(:visit_group => From(visit_occurrence) |>
Group(Get.person_id),
on = Get.person_id .== Get.visit_group.person_id) |>
Select(Get.person_id,
:max_visit_start_date =>
Get.visit_group |> Agg.max(Get.visit_start_date),
:max_visit_end_date =>
Get.visit_group |> Agg.max(Get.visit_end_date))
print(render(q))
#=>
SELECT
"person_1"."person_id",
"visit_group_1"."max" AS "max_visit_start_date",
"visit_group_1"."max_2" AS "max_visit_end_date"
FROM "person" AS "person_1"
JOIN (
SELECT
max("visit_occurrence_1"."visit_start_date") AS "max",
max("visit_occurrence_1"."visit_end_date") AS "max_2",
"visit_occurrence_1"."person_id"
FROM "visit_occurrence" AS "visit_occurrence_1"
GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_group_1" ON ("person_1"."person_id" = "visit_group_1"."person_id")
=#
Aggregate expressions can be applied to a filtered portion of a partition.
e = Agg.count(filter = Get.year_of_birth .> 1950)
#-> Agg.count(filter = (…))
display(e)
#-> Agg.count(filter = Fun.">"(Get.year_of_birth, 1950))
q = From(person) |> Group() |> Select(e)
print(render(q))
#=>
SELECT (count(*) FILTER (WHERE ("person_1"."year_of_birth" > 1950))) AS "count"
FROM "person" AS "person_1"
=#
It is an error for an aggregate expression to be used without Group
.
q = From(person) |> Select(Agg.count())
print(render(q))
#=>
ERROR: FunSQL.ReferenceError: aggregate expression requires Group or Partition in:
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Select(Agg.count())
q2
end
=#
Group
in a Join
expression shadows any previous applications of Group
.
qₚ = From(person)
qᵥ = From(visit_occurrence) |> Group(:visit_person_id => Get.person_id)
qₘ = From(measurement) |> Group(:measurement_person_id => Get.person_id)
q = qₚ |>
Join(qᵥ, on = Get.person_id .== Get.visit_person_id, left = true) |>
Join(qₘ, on = Get.person_id .== Get.measurement_person_id, left = true) |>
Select(Get.person_id, :count => Fun.coalesce(Agg.count(), 0))
print(render(q))
#=>
SELECT
"person_1"."person_id",
coalesce("measurement_2"."count", 0) AS "count"
FROM "person" AS "person_1"
LEFT JOIN (
SELECT DISTINCT "visit_occurrence_1"."person_id" AS "visit_person_id"
FROM "visit_occurrence" AS "visit_occurrence_1"
) AS "visit_occurrence_2" ON ("person_1"."person_id" = "visit_occurrence_2"."visit_person_id")
LEFT JOIN (
SELECT
count(*) AS "count",
"measurement_1"."person_id" AS "measurement_person_id"
FROM "measurement" AS "measurement_1"
GROUP BY "measurement_1"."person_id"
) AS "measurement_2" ON ("person_1"."person_id" = "measurement_2"."measurement_person_id")
=#
It is still possible to use an aggregate in the context of a Join when the corresponding Group
could be determined unambiguously.
qₚ = From(person)
qᵥ = From(visit_occurrence) |> Group(:visit_person_id => Get.person_id)
q = qₚ |>
Join(qᵥ, on = Get.person_id .== Get.visit_person_id, left = true) |>
Select(Get.person_id, :count => Fun.coalesce(Agg.count(), 0))
print(render(q))
#=>
SELECT
"person_1"."person_id",
coalesce("visit_occurrence_2"."count", 0) AS "count"
FROM "person" AS "person_1"
LEFT JOIN (
SELECT
count(*) AS "count",
"visit_occurrence_1"."person_id" AS "visit_person_id"
FROM "visit_occurrence" AS "visit_occurrence_1"
GROUP BY "visit_occurrence_1"."person_id"
) AS "visit_occurrence_2" ON ("person_1"."person_id" = "visit_occurrence_2"."visit_person_id")
=#
Partition
The Partition
constructor creates a subquery that partitions the rows by the given keys.
q = From(person) |>
Partition(Get.year_of_birth, order_by = [Get.month_of_birth, Get.day_of_birth])
#-> (…) |> Partition(…, order_by = […])
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |>
Partition(Get.year_of_birth,
order_by = [Get.month_of_birth, Get.day_of_birth])
q2
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
=#
A Partition
node can be created with @funsql
notation.
q = @funsql begin
from(person)
partition(year_of_birth, order_by = [month_of_birth, day_of_birth])
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |>
Partition(Get.year_of_birth,
order_by = [Get.month_of_birth, Get.day_of_birth])
q2
end
=#
Calculations across the rows of the partitions are performed by window functions.
q = From(person) |>
Partition(Get.gender_concept_id) |>
Select(Get.person_id, Agg.row_number())
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Partition(Get.gender_concept_id),
q3 = q2 |> Select(Get.person_id, Agg.row_number())
q3
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
(row_number() OVER (PARTITION BY "person_1"."gender_concept_id")) AS "row_number"
FROM "person" AS "person_1"
=#
q = From(visit_occurrence) |>
Partition(Get.person_id) |>
Where(Get.visit_start_date .- Agg.min(Get.visit_start_date, filter = Get.visit_start_date .< Get.visit_end_date) .> 30) |>
Select(Get.person_id, Get.visit_start_date)
print(render(q))
#=>
SELECT
"visit_occurrence_2"."person_id",
"visit_occurrence_2"."visit_start_date"
FROM (
SELECT
"visit_occurrence_1"."person_id",
"visit_occurrence_1"."visit_start_date",
(min("visit_occurrence_1"."visit_start_date") FILTER (WHERE ("visit_occurrence_1"."visit_start_date" < "visit_occurrence_1"."visit_end_date")) OVER (PARTITION BY "visit_occurrence_1"."person_id")) AS "min"
FROM "visit_occurrence" AS "visit_occurrence_1"
) AS "visit_occurrence_2"
WHERE (("visit_occurrence_2"."visit_start_date" - "visit_occurrence_2"."min") > 30)
=#
A partition may specify the window frame.
q = From(person) |>
Group(Get.year_of_birth) |>
Partition(order_by = [Get.year_of_birth],
frame = (mode = :range, start = -1, finish = 1)) |>
Select(Get.year_of_birth, Agg.avg(Agg.count()))
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Group(Get.year_of_birth),
q3 = q2 |>
Partition(order_by = [Get.year_of_birth],
frame = (mode = :RANGE, start = -1, finish = 1)),
q4 = q3 |> Select(Get.year_of_birth, Agg.avg(Agg.count()))
q4
end
=#
print(render(q))
#=>
SELECT
"person_1"."year_of_birth",
(avg(count(*)) OVER (ORDER BY "person_1"."year_of_birth" RANGE BETWEEN 1 PRECEDING AND 1 FOLLOWING)) AS "avg"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
=#
A window frame can be specified in @funsql
notation.
q = @funsql partition(order_by = [year_of_birth], frame = groups)
display(q)
#-> Partition(order_by = [Get.year_of_birth], frame = :GROUPS)
q = @funsql partition(order_by = [year_of_birth], frame = (mode = range, start = -1, finish = 1))
display(q)
#=>
Partition(order_by = [Get.year_of_birth],
frame = (mode = :RANGE, start = -1, finish = 1))
=#
q = @funsql partition(; order_by = [year_of_birth], frame = (mode = range, start = -Inf, finish = Inf, exclude = current_row))
display(q)
#=>
Partition(
order_by = [Get.year_of_birth],
frame =
(mode = :RANGE, start = -Inf, finish = Inf, exclude = :CURRENT_ROW))
=#
Partition
may assign an explicit name to the partition.
q = From(person) |>
Group(Get.gender_concept_id) |>
Partition(name = :all) |>
Define(:pct => 100 .* Agg.count() ./ (Get.all |> Agg.sum(Agg.count())))
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Group(Get.gender_concept_id),
q3 = q2 |> Partition(name = :all),
q4 = q3 |>
Define(Fun."/"(Fun."*"(100, Agg.count()),
Get.all |> Agg.sum(Agg.count())) |>
As(:pct))
q4
end
=#
print(render(q))
#=>
SELECT
"person_2"."gender_concept_id",
((100 * "person_2"."count") / (sum("person_2"."count") OVER ())) AS "pct"
FROM (
SELECT
"person_1"."gender_concept_id",
count(*) AS "count"
FROM "person" AS "person_1"
GROUP BY "person_1"."gender_concept_id"
) AS "person_2"
=#
This name may shadow an existing column.
q = From(location) |>
Partition(Get.location_id, name = :location_id)
print(render(q))
#=>
SELECT
"location_1"."city",
"location_1"."state"
FROM "location" AS "location_1"
=#
It is common to use several Partition
nodes in a row like in the following example which calculates non-overlapping visits.
q = From(visit_occurrence) |>
Partition(Get.person_id,
order_by = [Get.visit_start_date],
frame = (mode = :rows, start = -Inf, finish = -1)) |>
Define(:boundary => Agg.max(Get.visit_end_date)) |>
Define(:gap => Get.visit_start_date .- Get.boundary) |>
Define(:new => Fun.case(Get.gap .<= 0, 0, 1)) |>
Partition(Get.person_id,
order_by = [Get.visit_start_date, .- Get.new],
frame = :rows) |>
Define(:group => Agg.sum(Get.new)) |>
Group(Get.person_id, Get.group) |>
Define(:start_date => Agg.min(Get.visit_start_date),
:end_date => Agg.max(Get.visit_end_date)) |>
Select(Get.person_id, Get.start_date, Get.end_date)
print(render(q))
#=>
SELECT
"visit_occurrence_3"."person_id",
min("visit_occurrence_3"."visit_start_date") AS "start_date",
max("visit_occurrence_3"."visit_end_date") AS "end_date"
FROM (
SELECT
"visit_occurrence_2"."person_id",
(sum("visit_occurrence_2"."new") OVER (PARTITION BY "visit_occurrence_2"."person_id" ORDER BY "visit_occurrence_2"."visit_start_date", (- "visit_occurrence_2"."new") ROWS UNBOUNDED PRECEDING)) AS "group",
"visit_occurrence_2"."visit_start_date",
"visit_occurrence_2"."visit_end_date"
FROM (
SELECT
"visit_occurrence_1"."person_id",
"visit_occurrence_1"."visit_start_date",
"visit_occurrence_1"."visit_end_date",
(CASE WHEN (("visit_occurrence_1"."visit_start_date" - (max("visit_occurrence_1"."visit_end_date") OVER (PARTITION BY "visit_occurrence_1"."person_id" ORDER BY "visit_occurrence_1"."visit_start_date" ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING))) <= 0) THEN 0 ELSE 1 END) AS "new"
FROM "visit_occurrence" AS "visit_occurrence_1"
) AS "visit_occurrence_2"
) AS "visit_occurrence_3"
GROUP BY
"visit_occurrence_3"."person_id",
"visit_occurrence_3"."group"
=#
Join
The Join
constructor creates a subquery that correlates two nested subqueries.
q = From(person) |>
Join(:location => From(location),
on = Get.location_id .== Get.location.location_id,
left = true)
#-> (…) |> Join(…)
display(q)
#=>
let person = SQLTable(:person, …),
location = SQLTable(:location, …),
q1 = From(person),
q2 = From(location),
q3 = q1 |>
Join(q2 |> As(:location),
Fun."="(Get.location_id, Get.location.location_id),
left = true)
q3
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
LEFT JOIN "location" AS "location_1" ON ("person_1"."location_id" = "location_1"."location_id")
=#
LEFT JOIN
is commonly used and has its own constructor.
q = From(person) |>
LeftJoin(:location => From(location),
on = Get.location_id .== Get.location.location_id)
display(q)
#=>
let person = SQLTable(:person, …),
location = SQLTable(:location, …),
q1 = From(person),
q2 = From(location),
q3 = q1 |>
Join(q2 |> As(:location),
Fun."="(Get.location_id, Get.location.location_id),
left = true)
q3
end
=#
Various Join
nodes can be created with @funsql
notation.
q = @funsql begin
from(person)
join(location => from(location),
on = location_id == location.location_id,
left = true)
end
display(q)
#=>
let q1 = From(:person),
q2 = From(:location),
q3 = q1 |>
Join(q2 |> As(:location),
Fun."="(Get.location_id, Get.location.location_id),
left = true)
q3
end
=#
q = @funsql begin
from(person)
left_join(location => from(location),
location_id == location.location_id)
end
display(q)
#=>
let q1 = From(:person),
q2 = From(:location),
q3 = q1 |>
Join(q2 |> As(:location),
Fun."="(Get.location_id, Get.location.location_id),
left = true)
q3
end
=#
q = @funsql begin
from(person)
cross_join(other => from(person))
end
display(q)
#=>
let q1 = From(:person),
q2 = From(:person),
q3 = q1 |> Join(q2 |> As(:other), true)
q3
end
=#
Nested subqueries that are combined with Join
may fail to collapse.
q = From(person) |>
Where(Get.year_of_birth .> 1970) |>
Join(:location => From(location) |>
Where(Get.state .== "IL"),
on = (Get.location_id .== Get.location.location_id)) |>
Select(Get.person_id, Get.location.city)
print(render(q))
#=>
SELECT
"person_2"."person_id",
"location_2"."city"
FROM (
SELECT
"person_1"."person_id",
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1970)
) AS "person_2"
JOIN (
SELECT
"location_1"."city",
"location_1"."location_id"
FROM "location" AS "location_1"
WHERE ("location_1"."state" = 'IL')
) AS "location_2" ON ("person_2"."location_id" = "location_2"."location_id")
=#
An outer Join
does not collapse its branches when doing so may change the values of unmatched rows.
join_q(; left = false, right = false) =
From(:cohort1) |> Define(:n_cohort => 1) |> As(:cohort1) |>
Join(From(:cohort2) |> Define(:n_cohort => 1) |> As(:cohort2),
on = Get.cohort1.person_id .== Get.cohort2.person_id,
left = left,
right = right) |>
Select(:person_id => Fun.coalesce(Get.cohort1.person_id, Get.cohort2.person_id),
:n_cohort => Fun.coalesce(Get.cohort1.n_cohort, 0) .+ Fun.coalesce(Get.cohort2.n_cohort, 0)) |>
With(:cohort1 => From(person) |> Where(Get.year_of_birth .> 1970),
:cohort2 => From(person) |> Where(Get.year_of_birth .< 1990))
print(render(join_q()))
#=>
WITH "cohort1_1" ("person_id") AS (
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
SELECT "person_2"."person_id"
FROM "person" AS "person_2"
WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
coalesce("cohort1_2"."person_id", "cohort2_2"."person_id") AS "person_id",
(coalesce(1, 0) + coalesce(1, 0)) AS "n_cohort"
FROM "cohort1_1" AS "cohort1_2"
JOIN "cohort2_1" AS "cohort2_2" ON ("cohort1_2"."person_id" = "cohort2_2"."person_id")
=#
print(render(join_q(left = true)))
#=>
WITH "cohort1_1" ("person_id") AS (
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
SELECT "person_2"."person_id"
FROM "person" AS "person_2"
WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
coalesce("cohort1_2"."person_id", "cohort2_3"."person_id") AS "person_id",
(coalesce(1, 0) + coalesce("cohort2_3"."n_cohort", 0)) AS "n_cohort"
FROM "cohort1_1" AS "cohort1_2"
LEFT JOIN (
SELECT
"cohort2_2"."person_id",
1 AS "n_cohort"
FROM "cohort2_1" AS "cohort2_2"
) AS "cohort2_3" ON ("cohort1_2"."person_id" = "cohort2_3"."person_id")
=#
print(render(join_q(right = true)))
#=>
WITH "cohort1_1" ("person_id") AS (
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
SELECT "person_2"."person_id"
FROM "person" AS "person_2"
WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
coalesce("cohort1_3"."person_id", "cohort2_2"."person_id") AS "person_id",
(coalesce("cohort1_3"."n_cohort", 0) + coalesce(1, 0)) AS "n_cohort"
FROM (
SELECT
"cohort1_2"."person_id",
1 AS "n_cohort"
FROM "cohort1_1" AS "cohort1_2"
) AS "cohort1_3"
RIGHT JOIN "cohort2_1" AS "cohort2_2" ON ("cohort1_3"."person_id" = "cohort2_2"."person_id")
=#
print(render(join_q(left = true, right = true)))
#=>
WITH "cohort1_1" ("person_id") AS (
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 1970)
),
"cohort2_1" ("person_id") AS (
SELECT "person_2"."person_id"
FROM "person" AS "person_2"
WHERE ("person_2"."year_of_birth" < 1990)
)
SELECT
coalesce("cohort1_3"."person_id", "cohort2_3"."person_id") AS "person_id",
(coalesce("cohort1_3"."n_cohort", 0) + coalesce("cohort2_3"."n_cohort", 0)) AS "n_cohort"
FROM (
SELECT
"cohort1_2"."person_id",
1 AS "n_cohort"
FROM "cohort1_1" AS "cohort1_2"
) AS "cohort1_3"
FULL JOIN (
SELECT
"cohort2_2"."person_id",
1 AS "n_cohort"
FROM "cohort2_1" AS "cohort2_2"
) AS "cohort2_3" ON ("cohort1_3"."person_id" = "cohort2_3"."person_id")
=#
Join
can be applied to correlated subqueries.
ql(person_id) =
From(visit_occurrence) |>
Where(Get.person_id .== Var.PERSON_ID) |>
Partition(order_by = [Get.visit_start_date]) |>
Where(Agg.row_number() .== 1) |>
Bind(:PERSON_ID => person_id)
print(render(ql(1)))
#=>
SELECT
"visit_occurrence_2"."visit_occurrence_id",
"visit_occurrence_2"."person_id",
"visit_occurrence_2"."visit_start_date",
"visit_occurrence_2"."visit_end_date"
FROM (
SELECT
"visit_occurrence_1"."visit_occurrence_id",
"visit_occurrence_1"."person_id",
"visit_occurrence_1"."visit_start_date",
"visit_occurrence_1"."visit_end_date",
(row_number() OVER (ORDER BY "visit_occurrence_1"."visit_start_date")) AS "row_number"
FROM "visit_occurrence" AS "visit_occurrence_1"
WHERE ("visit_occurrence_1"."person_id" = 1)
) AS "visit_occurrence_2"
WHERE ("visit_occurrence_2"."row_number" = 1)
=#
q = From(person) |>
Join(:visit => ql(Get.person_id), on = true) |>
Select(Get.person_id,
Get.visit.visit_occurrence_id,
Get.visit.visit_start_date)
print(render(q))
#=>
SELECT
"person_1"."person_id",
"visit_1"."visit_occurrence_id",
"visit_1"."visit_start_date"
FROM "person" AS "person_1"
CROSS JOIN LATERAL (
SELECT
"visit_occurrence_2"."visit_occurrence_id",
"visit_occurrence_2"."visit_start_date"
FROM (
SELECT
"visit_occurrence_1"."visit_occurrence_id",
"visit_occurrence_1"."visit_start_date",
(row_number() OVER (ORDER BY "visit_occurrence_1"."visit_start_date")) AS "row_number"
FROM "visit_occurrence" AS "visit_occurrence_1"
WHERE ("visit_occurrence_1"."person_id" = "person_1"."person_id")
) AS "visit_occurrence_2"
WHERE ("visit_occurrence_2"."row_number" = 1)
) AS "visit_1"
=#
The LATERAL
keyword is omitted when the join branch is reduced to a function call.
q = From(concept) |>
Join(
From(Fun.string_to_table(Get.concept_name, " "), columns = [:word]),
on = true) |>
Group(Get.word)
print(render(q))
#=>
SELECT DISTINCT "string_to_table_1"."word"
FROM "concept" AS "concept_1"
CROSS JOIN string_to_table("concept_1"."concept_name", ' ') AS "string_to_table_1" ("word")
=#
Some database backends require LATERAL
even in this case.
print(render(q, dialect = :spark))
#=>
SELECT DISTINCT `string_to_table_1`.`word`
FROM `concept` AS `concept_1`
CROSS JOIN LATERAL string_to_table(`concept_1`.`concept_name`, ' ') AS `string_to_table_1` (`word`)
=#
An optional Join
is omitted when the output contains no data from its right branch.
q = From(person) |>
LeftJoin(:location => From(location),
on = Get.location_id .== Get.location.location_id,
optional = true)
display(q)
#=>
let person = SQLTable(:person, …),
location = SQLTable(:location, …),
q1 = From(person),
q2 = From(location),
q3 = q1 |>
Join(q2 |> As(:location),
Fun."="(Get.location_id, Get.location.location_id),
left = true,
optional = true)
q3
end
=#
print(render(q |> Select(Get.year_of_birth)))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
=#
print(render(q |> Select(Get.year_of_birth, Get.location.state)))
#=>
SELECT
"person_1"."year_of_birth",
"location_1"."state"
FROM "person" AS "person_1"
LEFT JOIN "location" AS "location_1" ON ("person_1"."location_id" = "location_1"."location_id")
=#
Order
The Order
constructor creates a subquery for sorting the data.
q = From(person) |>
Order(Get.year_of_birth)
#-> (…) |> Order(…)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Order(Get.year_of_birth)
q2
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."year_of_birth"
=#
An Order
node can be created with @funsql
notation.
q = @funsql begin
from(person)
order(year_of_birth)
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Order(Get.year_of_birth)
q2
end
=#
Order
is often used together with Limit
.
q = From(person) |>
Order(Get.year_of_birth) |>
Limit(10) |>
Order(Get.person_id)
print(render(q))
#=>
SELECT
"person_2"."person_id",
⋮
"person_2"."location_id"
FROM (
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."year_of_birth"
FETCH FIRST 10 ROWS ONLY
) AS "person_2"
ORDER BY "person_2"."person_id"
=#
An Order
without columns to sort by is a no-op.
q = From(person) |>
Order(by = [])
#-> (…) |> Order(by = [])
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
=#
It is possible to specify ascending or descending order of the sort column.
q = From(person) |>
Order(Get.year_of_birth |> Desc(nulls = :first),
Get.person_id |> Asc())
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |>
Order(Get.year_of_birth |> Desc(nulls = :NULLS_FIRST),
Get.person_id |> Asc())
q2
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY
"person_1"."year_of_birth" DESC NULLS FIRST,
"person_1"."person_id" ASC
=#
A generic Sort
constructor could also be used for this purpose.
q = From(person) |>
Order(Get.year_of_birth |> Sort(:desc, nulls = :first),
Get.person_id |> Sort(:asc))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY
"person_1"."year_of_birth" DESC NULLS FIRST,
"person_1"."person_id" ASC
=#
Sort decorations can be created with @funsql
.
q = @funsql begin
from(person)
order(year_of_birth.desc(nulls = first), person_id.asc())
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |>
Order(Get.year_of_birth |> Desc(nulls = :NULLS_FIRST),
Get.person_id |> Asc())
q2
end
=#
q = @funsql begin
from(person)
order(year_of_birth.sort(desc, nulls = first), person_id.sort(asc))
end
display(q)
#=>
let q1 = From(:person),
q2 = q1 |>
Order(Get.year_of_birth |> Desc(nulls = :NULLS_FIRST),
Get.person_id |> Asc())
q2
end
=#
Limit
The Limit
constructor creates a subquery that takes a fixed-size slice of the dataset.
q = From(person) |>
Order(Get.person_id) |>
Limit(10)
#-> (…) |> Limit(10)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Order(Get.person_id),
q3 = q2 |> Limit(10)
q3
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."person_id"
FETCH FIRST 10 ROWS ONLY
=#
Both the offset and the limit can be specified.
q = From(person) |>
Order(Get.person_id) |>
Limit(100, 10)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Order(Get.person_id),
q3 = q2 |> Limit(100, 10)
q3
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."person_id"
OFFSET 100 ROWS
FETCH NEXT 10 ROWS ONLY
=#
q = From(person) |>
Order(Get.person_id) |>
Limit(101:110)
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
ORDER BY "person_1"."person_id"
OFFSET 100 ROWS
FETCH NEXT 10 ROWS ONLY
=#
q = From(person) |>
Limit(offset = 100) |>
Limit(limit = 10)
print(render(q))
#=>
SELECT
"person_2"."person_id",
⋮
"person_2"."location_id"
FROM (
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
OFFSET 100 ROWS
) AS "person_2"
FETCH FIRST 10 ROWS ONLY
=#
q = From(person) |>
Limit()
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
=#
A Limit
node can be created with @funsql
notation.
q = @funsql from(person).order(person_id).limit(10)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Order(Get.person_id),
q3 = q2 |> Limit(10)
q3
end
=#
q = @funsql from(person).order(person_id).limit(100, 10)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Order(Get.person_id),
q3 = q2 |> Limit(100, 10)
q3
end
=#
q = @funsql from(person).order(person_id).limit(101:110)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Order(Get.person_id),
q3 = q2 |> Limit(100, 10)
q3
end
=#
Select
The Select
constructor creates a subquery that fixes the output columns.
q = From(person) |>
Select(Get.person_id)
#-> (…) |> Select(…)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Select(Get.person_id)
q2
end
=#
print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
=#
A Select
node can be created with @funsql
notation.
q = @funsql from(person).select(person_id)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Select(Get.person_id)
q2
end
=#
Select
does not have to be the last subquery in a chain, but it always creates a complete subquery.
q = From(person) |>
Select(Get.year_of_birth) |>
Where(Fun.">"(Get.year_of_birth, 2000))
print(render(q))
#=>
SELECT "person_2"."year_of_birth"
FROM (
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
) AS "person_2"
WHERE ("person_2"."year_of_birth" > 2000)
=#
Select
requires all columns in the list to have unique aliases.
q = From(person) |>
Select(Get.person_id, Get.person_id)
#=>
ERROR: FunSQL.DuplicateLabelError: `person_id` is used more than once in:
Select(Get.person_id, Get.person_id)
=#
Where
The Where
constructor creates a subquery that filters by the given condition.
q = From(person) |>
Where(Fun.">"(Get.year_of_birth, 2000))
#-> (…) |> Where(…)
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000))
q2
end
=#
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 2000)
=#
A Where
node can be created with @funsql
notation.
q = @funsql from(person).filter(year_of_birth > 2000)
display(q)
#=>
let q1 = From(:person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000))
q2
end
=#
Several Where
operations in a row are collapsed to a single WHERE
clause.
q = From(person) |>
Where(Fun.">"(Get.year_of_birth, 2000)) |>
Where(Fun."<"(Get.year_of_birth, 2020)) |>
Where(Fun."<>"(Get.year_of_birth, 2010))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE
("person_1"."year_of_birth" > 2000) AND
("person_1"."year_of_birth" < 2020) AND
("person_1"."year_of_birth" <> 2010)
=#
q = From(person) |>
Where(Get.year_of_birth .!= 2010) |>
Where(Fun.and(Get.year_of_birth .> 2000, Get.year_of_birth .< 2020))
print(render(q))
#=>
SELECT
"person_1"."person_id",
⋮
"person_1"."location_id"
FROM "person" AS "person_1"
WHERE
("person_1"."year_of_birth" <> 2010) AND
("person_1"."year_of_birth" > 2000) AND
("person_1"."year_of_birth" < 2020)
=#
Where
that follows Group
subquery is transformed to a HAVING
clause.
q = From(person) |>
Group(Get.year_of_birth) |>
Where(Agg.count() .> 10)
print(render(q))
#=>
SELECT "person_1"."year_of_birth"
FROM "person" AS "person_1"
GROUP BY "person_1"."year_of_birth"
HAVING (count(*) > 10)
=#
q = From(person) |>
Group(Get.gender_concept_id) |>
Where(Agg.count(filter = Get.year_of_birth .== 2010) .> 10) |>
Where(Agg.count(filter = Get.year_of_birth .== 2000) .< 100) |>
Where(Fun.and(Agg.count(filter = Get.year_of_birth .== 1933) .!= 33,
Agg.count(filter = Get.year_of_birth .== 1966) .!= 66))
print(render(q))
#=>
SELECT "person_1"."gender_concept_id"
FROM "person" AS "person_1"
GROUP BY "person_1"."gender_concept_id"
HAVING
((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2010))) > 10) AND
((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2000))) < 100) AND
((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 1933))) <> 33) AND
((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 1966))) <> 66)
=#
q = From(person) |>
Group(Get.gender_concept_id) |>
Where(Fun.or(Agg.count(filter = Get.year_of_birth .== 2010) .> 10,
Agg.count(filter = Get.year_of_birth .== 2000) .< 100))
print(render(q))
#=>
SELECT "person_1"."gender_concept_id"
FROM "person" AS "person_1"
GROUP BY "person_1"."gender_concept_id"
HAVING
((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2010))) > 10) OR
((count(*) FILTER (WHERE ("person_1"."year_of_birth" = 2000))) < 100)
=#
Highlighting
To highlight a node on the output, wrap it with Highlight
.
q = From(person) |>
Highlight(:underline) |>
Where(Fun.">"(Get.year_of_birth |> Highlight(:bold), 2000) |>
Highlight(:white)) |>
Select(Get.person_id) |>
Highlight(:green)
#-> (…) |> Highlight(:green)
When the query is displayed on a color terminal, the affected node is highlighted.
display(q)
#=>
let person = SQLTable(:person, …),
q1 = From(person),
q2 = q1 |> Where(Fun.">"(Get.year_of_birth, 2000)),
q3 = q2 |> Select(Get.person_id)
q3
end
=#
The Highlight
node does not otherwise affect processing of the query.
print(render(q))
#=>
SELECT "person_1"."person_id"
FROM "person" AS "person_1"
WHERE ("person_1"."year_of_birth" > 2000)
=#
A Highlight
node can be created with @funsql
notation.
q = @funsql from(person).highlight(red)
display(q)
#=>
let q1 = From(:person)
q1
end
=#
Debugging
Enable debug logging to get some insight on how FunSQL translates a query object into SQL. Set the JULIA_DEBUG
environment variable to the name of a translation stage and render()
will print the result of this stage.
Consider the following query.
q = From(person) |>
Where(Get.year_of_birth .<= 2000) |>
Join(:location => From(location) |>
Where(Get.state .== "IL"),
on = (Get.location_id .== Get.location.location_id)) |>
Join(:visit_group => From(visit_occurrence) |>
Group(Get.person_id),
on = (Get.person_id .== Get.visit_group.person_id),
left = true) |>
Select(Get.person_id,
:max_visit_start_date =>
Get.visit_group |> Agg.max(Get.visit_start_date))
At the first stage of the translation, render()
resolves table references and determines node types.
#? VERSION >= v"1.7" # https://github.com/JuliaLang/julia/issues/26798
withenv("JULIA_DEBUG" => "FunSQL.resolve") do
render(q)
end;
#=>
┌ Debug: FunSQL.resolve
│ let person = SQLTable(:person, …),
│ location = SQLTable(:location, …),
│ visit_occurrence = SQLTable(:visit_occurrence, …),
│ q1 = FromTable(table = person),
│ q2 = Resolved(RowType(:person_id => ScalarType(),
│ :gender_concept_id => ScalarType(),
│ :year_of_birth => ScalarType(),
│ :month_of_birth => ScalarType(),
│ :day_of_birth => ScalarType(),
│ :birth_datetime => ScalarType(),
│ :location_id => ScalarType()),
│ over = q1) |>
│ Where(Resolved(ScalarType(),
│ over = Fun."<="(Resolved(ScalarType(),
│ over = Get.year_of_birth),
│ Resolved(ScalarType(), over = 2000)))),
⋮
│ WithContext(over = Resolved(RowType(:person_id => ScalarType(),
│ :max_visit_start_date => ScalarType()),
│ over = q9),
│ catalog = SQLCatalog(dialect = SQLDialect(), cache = nothing))
│ end
└ @ FunSQL …
=#
Next, render()
determines, for each tabular node, the data that it must produce.
#? VERSION >= v"1.7"
withenv("JULIA_DEBUG" => "FunSQL.link") do
render(q)
end;
#=>
┌ Debug: FunSQL.link
│ let person = SQLTable(:person, …),
│ location = SQLTable(:location, …),
│ visit_occurrence = SQLTable(:visit_occurrence, …),
│ q1 = FromTable(table = person),
│ q2 = Get.person_id,
│ q3 = Get.person_id,
│ q4 = Get.location_id,
│ q5 = Get.year_of_birth,
│ q6 = Linked([q2, q3, q4, q5], 3, over = q1),
⋮
│ WithContext(over = q33,
│ catalog = SQLCatalog(dialect = SQLDialect(), cache = nothing))
│ end
└ @ FunSQL …
=#
On the next stage, the query object is converted to a SQL syntax tree.
#? VERSION >= v"1.7"
withenv("JULIA_DEBUG" => "FunSQL.translate") do
render(q)
end;
#=>
┌ Debug: FunSQL.translate
│ WITH_CONTEXT(
│ over = ID(:person) |>
│ AS(:person_1) |>
│ FROM() |>
│ WHERE(FUN("<=", ID(:person_1) |> ID(:year_of_birth), LIT(2000))) |>
│ SELECT(ID(:person_1) |> ID(:person_id),
│ ID(:person_1) |> ID(:location_id)) |>
│ AS(:person_2) |>
│ FROM() |>
│ JOIN(ID(:location) |>
│ AS(:location_1) |>
│ FROM() |>
│ WHERE(FUN("=", ID(:location_1) |> ID(:state), LIT("IL"))) |>
│ SELECT(ID(:location_1) |> ID(:location_id)) |>
│ AS(:location_2),
│ FUN("=",
│ ID(:person_2) |> ID(:location_id),
│ ID(:location_2) |> ID(:location_id))) |>
│ JOIN(ID(:visit_occurrence) |>
│ AS(:visit_occurrence_1) |>
│ FROM() |>
│ GROUP(ID(:visit_occurrence_1) |> ID(:person_id)) |>
│ SELECT(AGG("max",
│ ID(:visit_occurrence_1) |> ID(:visit_start_date)) |>
│ AS(:max),
│ ID(:visit_occurrence_1) |> ID(:person_id)) |>
│ AS(:visit_group_1),
│ FUN("=",
│ ID(:person_2) |> ID(:person_id),
│ ID(:visit_group_1) |> ID(:person_id)),
│ left = true) |>
│ SELECT(ID(:person_2) |> ID(:person_id),
│ ID(:visit_group_1) |> ID(:max) |> AS(:max_visit_start_date)),
│ columns = [SQLColumn(:person_id), SQLColumn(:max_visit_start_date)])
└ @ FunSQL …
=#
Finally, the SQL tree is serialized into SQL.
#? VERSION >= v"1.7"
withenv("JULIA_DEBUG" => "FunSQL.serialize") do
render(q)
end;
#=>
┌ Debug: FunSQL.serialize
│ SQLString(
│ """
│ SELECT
│ "person_2"."person_id",
│ "visit_group_1"."max" AS "max_visit_start_date"
│ FROM (
│ SELECT
│ "person_1"."person_id",
│ "person_1"."location_id"
│ FROM "person" AS "person_1"
│ WHERE ("person_1"."year_of_birth" <= 2000)
│ ) AS "person_2"
│ JOIN (
│ SELECT "location_1"."location_id"
│ FROM "location" AS "location_1"
│ WHERE ("location_1"."state" = 'IL')
│ ) AS "location_2" ON ("person_2"."location_id" = "location_2"."location_id")
│ LEFT JOIN (
│ SELECT
│ max("visit_occurrence_1"."visit_start_date") AS "max",
│ "visit_occurrence_1"."person_id"
│ FROM "visit_occurrence" AS "visit_occurrence_1"
│ GROUP BY "visit_occurrence_1"."person_id"
│ ) AS "visit_group_1" ON ("person_2"."person_id" = "visit_group_1"."person_id")""",
│ columns = [SQLColumn(:person_id), SQLColumn(:max_visit_start_date)])
└ @ FunSQL …
=#