/** * Returns true iff we can cast `from` type to `to` type. */ defcanCast(from: DataType, to: DataType): Boolean = (from, to) match { case (fromType, toType) if fromType == toType => true
case (NullType, _) => true
case (_, StringType) => true
case (StringType, BinaryType) => true
case (StringType, BooleanType) => true case (DateType, BooleanType) => true case (TimestampType, BooleanType) => true case (_: NumericType, BooleanType) => true
case (StringType, TimestampType) => true case (BooleanType, TimestampType) => true case (DateType, TimestampType) => true case (_: NumericType, TimestampType) => true
case (StringType, DateType) => true case (TimestampType, DateType) => true
case (StringType, CalendarIntervalType) => true
case (StringType, _: NumericType) => true case (BooleanType, _: NumericType) => true case (DateType, _: NumericType) => true case (TimestampType, _: NumericType) => true case (_: NumericType, _: NumericType) => true ... }
1 2 3 4 5 6 7 8 9 10 11 12 13
private[this] defcastToLong(from: DataType): Any => Any = from match { caseStringType => val result = newLongWrapper() buildCast[UTF8String](_, s => if (s.toLong(result)) result.value elsenull) caseBooleanType => buildCast[Boolean](_, b => if (b) 1L else0L) caseDateType => buildCast[Int](_, d => null) caseTimestampType => buildCast[Long](_, t => timestampToLong(t)) case x: NumericType => b => x.numeric.asInstanceOf[Numeric[Any]].toLong(b) }
1 2 3 4 5 6 7 8
// TimestampConverter private[this] defcastToTimestamp(from: DataType): Any => Any = from match { ... caseDateType => buildCast[Int](_, d => DateTimeUtils.daysToMillis(d, timeZone) * 1000) // TimestampWritable.decimalToTimestamp ... }
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19
/** * Given a timestamp, which corresponds to a certain time of day in the given timezone, returns * another timestamp that corresponds to the same time of day in UTC. * @group datetime_funcs * @since 1.5.0 */ defto_utc_timestamp(ts: Column, tz: String): Column = withExpr { ToUTCTimestamp(ts.expr, Literal(tz)) }
/** * Given a timestamp, which corresponds to a certain time of day in UTC, returns another timestamp * that corresponds to the same time of day in the given timezone. * @group datetime_funcs * @since 1.5.0 */ deffrom_utc_timestamp(ts: Column, tz: String): Column = withExpr { FromUTCTimestamp(ts.expr, Literal(tz)) }
/** * Replace [[TimeZoneAwareExpression]] without timezone id by its copy with session local * time zone. */ caseclassResolveTimeZone(conf: SQLConf) extendsRule[LogicalPlan] { privateval transformTimeZoneExprs: PartialFunction[Expression, Expression] = { case e: TimeZoneAwareExpressionif e.timeZoneId.isEmpty => e.withTimeZone(conf.sessionLocalTimeZone) // Casts could be added in the subquery plan through the rule TypeCoercion while coercing // the types between the value expression and list query expression of IN expression. // We need to subject the subquery plan through ResolveTimeZone again to setup timezone // information for time zone aware expressions. case e: ListQuery => e.withNewPlan(apply(e.plan)) }
/** * Mix-in trait for constructing valid [[Cast]] expressions. */ traitCastSupport{ /** * Configuration used to create a valid cast expression. */ defconf: SQLConf
/** * Create a Cast expression with the session local time zone. */ defcast(child: Expression, dataType: DataType): Cast = { Cast(child, dataType, Option(conf.sessionLocalTimeZone)) } }
val columnarBatch = ColumnarBatch.allocate(schema, MemoryMode.ON_HEAP, 1024) val c0 = columnarBatch.column(0) val c1 = columnarBatch.column(1) val c2 = columnarBatch.column(2)
c0.putInt(0, 0) // 1355241600, /3600/24 s to days c1.putInt(0, 1355241600 / 3600 / 24) // microsecond c2.putLong(0, 1355285532000000L)
val internal0 = columnarBatch.getRow(0)
//a way converting internal row to unsafe row. //val convert = UnsafeProjection.create(schema) //val internal = convert.apply(internal0)
val enc = RowEncoder.apply(schema).resolveAndBind() val row = enc.fromRow(internal0) val df = spark.createDataFrame(Lists.newArrayList(row), schema)
TimeZone.setDefault(TimeZone.getTimeZone("UTC")) val tsStr0 = df.select(col("time")).head().getTimestamp(0).toString val ts0 = df.select(col("time").cast(LongType)).head().getLong(0)
TimeZone.setDefault(TimeZone.getTimeZone("GMT+8")) val tsStr1 = df.select(col("time")).head().getTimestamp(0).toString val ts1 = df.select(col("time").cast(LongType)).head().getLong(0)
assert(true, "2012-12-12 04:12:12.0".equals(tsStr0)) assert(true, "2012-12-12 12:12:12.0".equals(tsStr1)) // to long 之后毫秒数都是一样的 assert(true, ts0 == ts1) }
case e: ImplicitCastInputTypesif e.inputTypes.nonEmpty => val children: Seq[Expression] = e.children.zip(e.inputTypes).map { case (in, expected) => // If we cannot do the implicit cast, just use the original input. implicitCast(in, expected).getOrElse(in) } e.withNewChildren(children)
private[this] defcastToDate(from: DataType): Any => Any = from match { caseStringType => buildCast[UTF8String](_, s => DateTimeUtils.stringToDate(s).orNull) caseTimestampType => // throw valid precision more than seconds, according to Hive. // Timestamp.nanos is in 0 to 999,999,999, no more than a second. buildCast[Long](_, t => DateTimeUtils.millisToDays(t / 1000L, timeZone)) }