import React from "react";
import QLearningTable from './img/Q-learing-table.jpg'
import DeepQLearning from './img/Deep-Q-learning-node.jpg'
import deepQEnvironmentAgent from './img/deep_q_environment_agent.png'
import doubleDeepQFormula from './img/double_deep_q_formula.png'
import './IntroductionPage.css'
import { Container, Row, Col } from "reactstrap";

function IntroductionPage() {
    return (
        <Container>
            
            <h1>Deep Q-Networks</h1>
            <br />
            <br />

            <h6 className="Introduction-Q-and-deepQ-content">In deep Q-learning, we use a neural network to approximate the Q-value function.</h6>
            <h6 className="Introduction-Q-and-deepQ-content">The state is given as the input and the Q-value of all possible actions is generated as the output.</h6>
            <h6 className="Introduction-Q-and-deepQ-content">The comparison between Q-learning & deep Q-learning is wonderfully illustrated below:</h6>
            <Row>
                <Col md="9">
                    <img src={QLearningTable} alt="Q Learning And DeepQLearning" className="Introduction-img-max-size"/>
                </Col>
            </Row>
            <Row>
                <Col md="11">
                    <img src={DeepQLearning} alt="Q Learning And DeepQLearning" className="Introduction-img-max-size"/>
                </Col>
            </Row>
            <br />
            <br />
            <h5>Because of the huge amount of input, a neural network is used instead of a q-table</h5>
            <br />
            <br />
            <div className="imageSize">
                <img src={deepQEnvironmentAgent} alt="Deep Q Environment Agent" className="Introduction-img-max-size"/>
            </div>
            <br />
            <br />
            <Row>
                <Col md="1"/>
                <h6 className="Introduction-deepQ-content">By taking actions and receiving rewards from the environment,</h6>
                <h6>the agent can learn which actions are favorable in a given state.</h6>
                <h6> The goal of the agent is to maximize its cumulative expected reward.</h6>
                <h6>but the drawback is sometimes the A.I. overestimate the q value</h6>
                <h6>To solve this, we've applied double deep q learning</h6>
            </Row>
            <br />
            <h2>Double q learning</h2>

            <div className="imageSize" >
                <img src={doubleDeepQFormula} alt="Double Deep Q Formula" className="Introduction-img-max-size"/>
            </div>

            <h6>The solution is: when we compute the Q target, </h6><h6>
                we use two networks to decouple the action selection from the target Q value generation. </h6>
            <h6>We use our DQN network to select what is the best action to take for the next state</h6>
            <h6>use our other network to calculate the target Q value of taking that action at the next state.</h6>

        </Container>

    )
}

export default IntroductionPage;