| Safe Haskell | None |
|---|---|
| Language | GHC2021 |
RL.DQN
Documentation
gamma :: forall (dev :: (DeviceType, Nat)). KnownDevice dev => QTensor dev ('[] :: [Nat]) Source #
learningRate :: forall (dev :: (DeviceType, Nat)). IsValidDevice dev => Double -> LearningRate dev QDType Source #
bufferSize :: Int Source #
data DQNState (dev :: (DeviceType, Nat)) opt tr tr' slc s f h (r :: k) Source #
greedyPolicy :: forall m embedding (dev :: (DeviceType, Nat)). Applicative m => (embedding -> QTensor dev '[1]) -> [embedding] -> m Int Source #
epsilonic :: StatefulGen gen m => gen -> QType -> ([embedding] -> m Int) -> [embedding] -> m Int Source #
softmaxPolicy :: forall gen m embedding (dev :: (DeviceType, Nat)). StatefulGen gen m => gen -> (embedding -> QTensor dev '[1]) -> [embedding] -> m Int Source #
runEpisode :: forall {k} (dev :: (DeviceType, Nat)) tr tr' slc slc' s f h (gen :: k) state action encoding step. (state ~ GreedyState tr tr' slc (Leftmost s f h), action ~ Action slc tr s f h, encoding ~ QEncoding dev ('[] :: [Nat]), step ~ (state, action, encoding, Maybe (state, [encoding]), Maybe Bool)) => Eval tr tr' slc slc' h (Leftmost s f h) -> (state -> action -> encoding) -> ([encoding] -> IO Int) -> Path slc' tr' -> IO (Either String ([step], Analysis s f h tr slc)) Source #
trainLoop :: forall (dev :: (DeviceType, Nat)) tr tr' slc slc' s f h gen opt {device :: (DeviceType, Nat)}. (GeluDTypeIsValid dev QDType, RandDTypeIsValid dev QDType, SumDTypeIsValid dev QDType, MeanDTypeValidation dev QDType, StandardFloatingPointDTypeValidation dev QDType, GeluDTypeIsValid device QDType, RandDTypeIsValid device QDType, BasicArithmeticDTypeIsValid device QDType, SumDTypeIsValid device QDType, MeanDTypeValidation device QDType, StandardFloatingPointDTypeValidation device QDType, BasicArithmeticDTypeIsValid dev 'Double, StatefulGen gen IO, Optimizer opt '[Tensor dev 'Double '[8, 1, 1, 1], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[EmbSize, 13, 5], Tensor dev 'Double '[EmbSize, 13, 5], Tensor dev 'Double '[8, 2, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 2, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 1, 1, 1], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 1, 1, 1], Tensor dev 'Double '[8], Tensor dev 'Double '[QTransHidden], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[5], Tensor dev 'Double '[5], Tensor dev 'Double '[5], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[1, 8], Tensor dev 'Double '[1], Tensor dev 'Double '[8, 8], Tensor dev 'Double '[8], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[1, 8], Tensor dev 'Double '[1]] '[Tensor dev 'Double '[8, 1, 1, 1], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[EmbSize, 13, 5], Tensor dev 'Double '[EmbSize, 13, 5], Tensor dev 'Double '[8, 2, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 2, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 1, 1, 1], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 1, 1, 1], Tensor dev 'Double '[8], Tensor dev 'Double '[QTransHidden], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[5], Tensor dev 'Double '[5], Tensor dev 'Double '[5], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[8, 8, 13, 5], Tensor dev 'Double '[8], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[1, 8], Tensor dev 'Double '[1], Tensor dev 'Double '[8, 8], Tensor dev 'Double '[8], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[QOutHidden], Tensor dev 'Double '[1, 8], Tensor dev 'Double '[1]] QDType device, KnownDevice device, KnownDevice dev) => gen -> Eval tr tr' slc slc' h (Leftmost s f h) -> (GreedyState tr tr' slc (Leftmost s f h) -> Action slc tr s f h -> QEncoding dev ('[] :: [Nat])) -> (Analysis s f h tr slc -> IO QType) -> (Action slc tr s f h -> Maybe Bool -> IO QType) -> Path slc' tr' -> DQNState dev opt tr tr' slc s f h QType -> Int -> Int -> IO (DQNState dev opt tr tr' slc s f h QType, QType, QType) Source #
trainDQN :: forall (dev :: (DeviceType, Nat)) gen tr tr' slc slc' s f h. (IsValidDevice dev, StatefulGen gen IO, Show s, Show f, Show h, s ~ Split SPitch, f ~ Freeze SPitch, h ~ Spread SPitch, Show slc, Show tr) => gen -> Eval tr tr' slc slc' h (Leftmost s f h) -> (GreedyState tr tr' slc (Leftmost s f h) -> Action slc tr s f h -> QEncoding dev ('[] :: [Nat])) -> (Analysis s f h tr slc -> IO QType) -> (Action slc tr s f h -> Maybe Bool -> IO QType) -> [Path slc' tr'] -> Int -> IO ([QType], [QType], QModel dev) Source #