Skip to content

Commit

Permalink
Merge pull request #97 from JuliaData/ast/ext_singlelinkedlist
Browse files Browse the repository at this point in the history
EXPRSXP, LANGSXP, DOTSXP support
  • Loading branch information
alyst authored Aug 10, 2023
2 parents 071d2f6 + eb08c82 commit 9d0d02e
Show file tree
Hide file tree
Showing 12 changed files with 189 additions and 112 deletions.
19 changes: 11 additions & 8 deletions src/DictoVec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -8,18 +8,21 @@ struct DictoVec{T} <: AbstractVector{T}
name2index::Dict{RString, Int}
index2name::Vector{Union{RString, Nothing}}

function DictoVec(data::AbstractVector{T}, names::AbstractVector{<:AbstractString} = Vector{RString}()) where T
if !isempty(names) && length(data) != length(names)
function DictoVec(data::AbstractVector{T},
names::Union{AbstractVector{<:AbstractString}, Nothing} = nothing) where T
if (names !== nothing) && !isempty(names) && length(data) != length(names)
throw(DimensionMismatch("Lengths of data ($(length(data))) and element names ($(length(names))) differ"))
end
n2i = Dict{RString, Int}()
i2n = fill!(similar(data, Union{RString, Nothing}), nothing)
@inbounds for (i, k) in enumerate(names)
if k != "" && k !== nothing
n2i[k] = i
i2n[i] = k
else
i2n[i] = nothing
if names !== nothing
@inbounds for (i, k) in enumerate(names)
if k != "" && k !== nothing
n2i[k] = i
i2n[i] = k
else
i2n[i] = nothing
end
end
end
new{T}(data, n2i, i2n)
Expand Down
115 changes: 59 additions & 56 deletions src/readers.jl
Original file line number Diff line number Diff line change
Expand Up @@ -50,10 +50,11 @@ function readstring(ctx::RDAContext, fl::RDATag)
end

function readlist(ctx::RDAContext, fl::RDATag)
@assert sxtype(fl) == VECSXP
@assert sxtype(fl) == VECSXP || sxtype(fl) == EXPRSXP
n = readlength(ctx.io)
RList(RSEXPREC[readitem(ctx) for i in 1:n],
readattrs(ctx, fl))
RVector{RSEXPREC, sxtype(fl)}(
RSEXPREC[readitem(ctx) for _ in 1:n],
readattrs(ctx, fl))
end

function readref(ctx::RDAContext, fl::RDATag)
Expand Down Expand Up @@ -108,69 +109,68 @@ function readpackage(ctx::RDAContext, fl::RDATag)
end

# reads single-linked lists R objects
function readpairedobjects(ctx::RDAContext, fl::RDATag)
res = RPairList(readattrs(ctx, fl))
ifl = fl # RDATag for the list item
while true
if sxtype(ifl) == sxtype(fl)
if hastag(ifl)
tag = readitem(ctx)
if isa(tag, RSymbol)
nm = tag.displayname
else
nm = emptyhashkey
end
else
nm = emptyhashkey
end
item = readitem(ctx)
push!(res, item, nm)
ifl = readuint32(ctx.io)
# the type of the result is RSpecialList{S},
# where S is the SXTYPE of the R object
function readsinglelinkedlist(::Val{S}, ctx::RDAContext, fl::RDATag) where S
res = RSpecialList{S}(readattrs(ctx, fl))
tag = hastag(fl) ? readitem(ctx) : RNull()
carfl = fl # RDATag for the CAR of the current list link
while carfl != NILVALUE_SXP
if !isempty(res) # head tag was read already
carfl = readuint32(ctx.io) # CAR container type
(carfl == NILVALUE_SXP) && break
# read the item attributes
# FIXME what to do with the attributes?
(sxtype(ifl) == sxtype(fl)) && readattrs(ctx, ifl)
elseif sxtype(ifl) == NILVALUE_SXP # end of list
break
else # end of list (not a single-linked list item)
# it's not clear whether it's an error of handling AltReps
# or a feature of AltReps (it only occurs within AltReps)
# normally pairlists should be terminated by NILVALUE_SXP
@warn "0x$(string(ifl, base=16)) element in a 0x$(string(fl, base=16)) list, assuming it's the last element"
item = readitem(ctx, ifl)
push!(res, item, emptyhashkey)
break
attrs = readattrs(ctx, carfl)
tag = hastag(carfl) ? readitem(ctx) : RNull()
end
if isa(tag, RSymbol)
nm = tag.displayname
else
isa(tag, RNull) || @warn "$(sxtypelabel(sxtype(fl))) link has unexpected tag type $(sxtypelabel(tag))"
nm = emptyhashkey
end
iscontainer = sxtype(carfl) == sxtype(fl) || sxtype(carfl) == LISTSXP
#if !iscontainer
# @warn "$(sxtypelabel(sxtype(carfl))) CAR element in a $(sxtypelabel(res)) list"
#end
item = iscontainer ?
readitem(ctx) : # item is inside the list link
readitem(ctx, carfl) # the link is the item
#if length(res) > 0 && sxtype(last(res.items)) != sxtype(item)
# # the items in the list are not required to be of the same type
# @warn "$(sxtypelabel(item)) element in a $(sxtypelabel(res)) list, previous was $(sxtypelabel(last(res.items)))"
#end
push!(res, item, nm)
iscontainer || break # no CDR, end of the list
end

return res
end

function readpairlist(ctx::RDAContext, fl::RDATag)
@assert sxtype(fl) == LISTSXP
readpairedobjects(ctx, fl)
end

function readlang(ctx::RDAContext, fl::RDATag)
@assert sxtype(fl) == LANGSXP
readpairedobjects(ctx, fl)
function readsinglelinkedlist(ctx::RDAContext, fl::RDATag)
S = sxtype(fl)
# check the SXTYPE is supported
@assert S == LISTSXP || S == LANGSXP || S == DOTSXP
readsinglelinkedlist(Val(S), ctx, fl)
end

function readclosure(ctx::RDAContext, fl::RDATag)
@assert sxtype(fl) == CLOSXP
res = RClosure(readattrs(ctx, fl))
hastag(fl) && (res.env = readitem(ctx))
res.formals = readitem(ctx)
res.body = readitem(ctx)
return res
attrs = readattrs(ctx, fl)
env = hastag(fl) ? readitem(ctx) : RNull()
formals = readitem(ctx)
body = readitem(ctx)
return RClosure(formals, body, env, attrs)
end

function readpromise(ctx::RDAContext, fl::RDATag)
@assert sxtype(fl) == PROMSXP
res = RPromise(readattrs(ctx, fl))
hastag(fl) && (res.env = readitem(ctx))
res.value = readitem(ctx)
res.expr = readitem(ctx)
return res
attrs = readattrs(ctx, fl)
env = hastag(fl) ? readitem(ctx) : RNull()
value = readitem(ctx)
expr = readitem(ctx)
return RPromise(value, expr, env, attrs)
end

function readraw(ctx::RDAContext, fl::RDATag)
Expand Down Expand Up @@ -274,7 +274,7 @@ function readaltrep(ctx::RDAContext, fl::RDATag)
end

function readunsupported(ctx::RDAContext, fl::RDATag)
throw(UnsupportedROBJ(sxtype(fl), "Reading SEXPREC of type $(sxtype(fl)) ($(SXTypes[sxtype(fl)].name)) is not supported"))
throw(UnsupportedROBJ(sxtype(fl), "Reading SEXPREC of type $(sxtypelabel(sxtype(fl))) is not supported"))
end

"""
Expand All @@ -291,11 +291,11 @@ Maps R type id (`SXType`) to its `SXTypeInfo`.
const SXTypes = Dict{SXType, SXTypeInfo}(
NILSXP => SXTypeInfo("NULL",readdummy),
SYMSXP => SXTypeInfo("Symbol",readsymbol),
LISTSXP => SXTypeInfo("Pairlist",readpairlist),
LISTSXP => SXTypeInfo("Pairlist",readsinglelinkedlist),
CLOSXP => SXTypeInfo("Closure",readclosure),
ENVSXP => SXTypeInfo("Environment",readenv),
PROMSXP => SXTypeInfo("Promise",readpromise),
LANGSXP => SXTypeInfo("Lang",readlang),
LANGSXP => SXTypeInfo("Lang",readsinglelinkedlist),
SPECIALSXP => SXTypeInfo("Special",readbuiltin),
BUILTINSXP => SXTypeInfo("Builtin",readbuiltin),
CHARSXP => SXTypeInfo("Char",readunsupported),
Expand All @@ -304,10 +304,10 @@ const SXTypes = Dict{SXType, SXTypeInfo}(
REALSXP => SXTypeInfo("Real",readnumeric),
CPLXSXP => SXTypeInfo("Complex",readcomplex),
STRSXP => SXTypeInfo("String",readstring),
DOTSXP => SXTypeInfo("Dot",readunsupported),
DOTSXP => SXTypeInfo("Dot",readsinglelinkedlist),
ANYSXP => SXTypeInfo("Any",readunsupported),
VECSXP => SXTypeInfo("List",readlist),
EXPRSXP => SXTypeInfo("Expr",readunsupported),
EXPRSXP => SXTypeInfo("Expr",readlist),
BCODESXP => SXTypeInfo("ByteCode",readbytecode),
EXTPTRSXP => SXTypeInfo("XPtr",readextptr),
WEAKREFSXP => SXTypeInfo("WeakRef",readunsupported),
Expand All @@ -334,9 +334,12 @@ const SXTypes = Dict{SXType, SXTypeInfo}(
ALTREP_SXP => SXTypeInfo("AltRep",readaltrep)
)

sxtypelabel(sxt::SXType) = "$(haskey(SXTypes, sxt) ? SXTypes[sxt].name : "Unknown") (0x$(string(sxt, base=16)))"
sxtypelabel(sxt::RSEXPREC) = sxtypelabel(sxtype(sxt))

function readitem(ctx::RDAContext, fl::RDATag)
sxt = sxtype(fl)
haskey(SXTypes, sxt) || throw(UnsupportedROBJ(sxt, "encountered unknown SEXPREC type $sxt"))
haskey(SXTypes, sxt) || throw(UnsupportedROBJ(sxt, "encountered unknown SEXPREC type 0x$(string(fl, base=16))"))
sxtinfo = SXTypes[sxt]
return sxtinfo.reader(ctx, fl)
### Should not occur at the top level
Expand Down
108 changes: 62 additions & 46 deletions src/sxtypes.jl
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,10 @@ abstract type RVEC{T, S} <: ROBJ{S} end
rvec_eltype(::Type{<:RVEC{T}}) where T = T
rvec_eltype(v::RVEC) = rvec_eltype(typeof(v))

Base.length(rl::RVEC) = length(rl.data)
Base.size(rv::RVEC) = size(rv.data) # overriden for RList to handle data.frame
Base.isempty(rv::RVEC) = isempty(rv.data)

"""
R vector object.
"""
Expand Down Expand Up @@ -145,56 +149,84 @@ end

const RStringVector = RNullableVector{RString,STRSXP}
const RList = RVector{RSEXPREC,VECSXP} # "list" in R == Julia cell array
const RExprList = RVector{RSEXPREC,EXPRSXP} # expression "list"

Base.size(rl::RList) = isdataframe(rl) ? (length(rl.data[1]), length(rl.data)) : size(rl.data)

# R objects without body (empty environments, missing args etc)
struct RDummy{S} <: RSEXPREC{S}
end

const RNull = RDummy{NILSXP}
const RGlobalEnv = RDummy{GLOBALENV_SXP}
const RBaseEnv = RDummy{BASEENV_SXP}
const REmptyEnv = RDummy{EMPTYENV_SXP}

mutable struct REnvironment <: ROBJ{ENVSXP}
enclosed::RSEXPREC
frame::RSEXPREC
hashtab::RSEXPREC
attr::Hash

REnvironment() = new(RNull(), RNull(), RNull(), Hash())
end

mutable struct RNamespace <: RSEXPREC{NAMESPACESXP}
name::Vector{RString}
end

mutable struct RPackage <: RSEXPREC{PACKAGESXP}
name::Vector{RString}
end

# any types that could be used as R environment in promises and closures
const REnvTypes = Union{REnvironment, RNamespace, RDummy}

"""
Representation of R's paired list-like structures (`LISTSXP`, `LANGSXP`).
Unlike R which represents these as singly-linked list,
`RPairList` uses vector representation.
Representation of R's single linked list-like structures
(`LISTSXP`, `LANGSXP`, 'DOTSXP').
Unlike R, which represents these as single linked list,
`RSpecialList` uses vector representation.
It is "special" because in R it is used to represent some internal data types.
"""
struct RPairList <: ROBJ{LISTSXP}
struct RSpecialList{S} <: ROBJ{S}
items::Vector{RSEXPREC}
tags::Vector{RString}
attr::Hash

RPairList(attr::Hash = Hash()) = new(RSEXPREC[], RString[], attr)
RSpecialList{S}(attr::Hash = Hash()) where S =
new{S}(RSEXPREC[], RString[], attr)
end

Base.length(list::RPairList) = length(list.items)
Base.length(list::RSpecialList) = length(list.items)
Base.size(list::RSpecialList) = size(list.items)
Base.isempty(list::RSpecialList) = isempty(list.items)

function Base.push!(pl::RPairList, item::RSEXPREC, tag::RString)
function Base.push!(pl::RSpecialList, item::RSEXPREC, tag::RString)
push!(pl.tags, tag)
push!(pl.items, item)
end

mutable struct RClosure <: ROBJ{CLOSXP}
formals
body
env
attr::Hash

RClosure(attr::Hash = Hash()) = new(nothing, nothing, nothing, attr)
end

struct RBuiltin <: RSEXPREC{BUILTINSXP}
internal_function::RString
end
const RPairList = RSpecialList{LISTSXP}
const RLang = RSpecialList{LANGSXP}
const RDot = RSpecialList{DOTSXP}

mutable struct RPromise <: ROBJ{PROMSXP}
value
expr
env
struct RClosure <: ROBJ{CLOSXP}
formals::RSEXPREC
body::RSEXPREC
env::REnvTypes
attr::Hash

RPromise(attr::Hash = Hash()) = new(nothing, nothing, nothing, attr)
end

mutable struct REnvironment <: ROBJ{ENVSXP}
enclosed
frame
hashtab
struct RPromise <: ROBJ{PROMSXP}
value::RSEXPREC
expr::RSEXPREC
env::REnvTypes
attr::Hash
end

REnvironment() = new(nothing, nothing, nothing, Hash())
struct RBuiltin <: RSEXPREC{BUILTINSXP}
internal_function::RString
end

struct RRaw <: ROBJ{RAWSXP}
Expand Down Expand Up @@ -223,24 +255,12 @@ mutable struct RBytecode <: ROBJ{BCODESXP}
new(attr, nothing, code, consts)
end

struct RPackage <: RSEXPREC{PACKAGESXP}
name::Vector{RString}
end

struct RNamespace <: RSEXPREC{NAMESPACESXP}
name::Vector{RString}
end

struct RAltRep <: ROBJ{ALTREP_SXP}
info
state
attr::Hash
end

# R objects without body (empty environments, missing args etc)
struct RDummy{S} <: RSEXPREC{S}
end

##############################################################################
##
## Utilities for working with basic properties of R objects:
Expand Down Expand Up @@ -278,10 +298,6 @@ inherits(ro::ROBJ, classnames::AbstractVector{<:AbstractString}) =
isdataframe(rl::RList) = inherits(rl, "data.frame")
isfactor(ri::RIntegerVector) = inherits(ri, "factor")

Base.length(rl::RVEC) = length(rl.data)
Base.size(rv::RVEC) = length(rv.data)
Base.size(rl::RList) = isdataframe(rl) ? (length(rl.data[1]), length(rl.data)) : length(rl.data)

row_names(ro::ROBJ) = getattr(ro, "row.names", emptystrvec)

altrep_typename(ar::RAltRep) =
Expand Down
1 change: 1 addition & 0 deletions test/DictoVec.jl
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ end

@test dv == DictoVec([2.0, 5.0, 4.0])
@test dv == DictoVec([2, 5, 4])
@test dv == DictoVec([2, 5, 4], String[])
@test isequal(dv, DictoVec([2.0, 5.0, 4.0]))
@test dv != DictoVec([3.0, 5.0, 4.0])
@test !isequal(dv, DictoVec([3.0, 5.0, 4.0]))
Expand Down
Loading

0 comments on commit 9d0d02e

Please sign in to comment.