Goal directed behavior under programattic control |
|||||||||
|
Download: prognav.jar (after saving the file locally, the components can be imported with the import... option from the File menu). This model implements a diffusion-like algorithm for goal directed navigation similar to that presented by Hasselmo (2005) [A model of prefrontal cortical mechanisms for goal directed behavior. Journal of Cognitive Neuroscience, 17(7):1115-29] (PDF and Matlab scripts on Hasselmo lab website).
At this stage, the maze implements the behavior of detecting when the food is taken, and the main experiment model defines the experiment: the rat is just move back to start point each time it gets the food. The rest of the behavior all belongs in the control script of the rat.
Navigator ModuleThe Navigator module uses three classes: Navigator.java, State.java and Action.java, which are embedded below for reference. Its purpose is to generate the next action wherever the rat is and to remember actions that are not allowed. The navigator only ever generates steps of a single fixed length in one of the four directions north, south, east and west, so the rat can only ever occupy the vertices of a square grid in the environment. When it visits a new vertex, it creates a new State object to represent that vertex, and each time it makes a new step between vertices, it creates a new Action object for that particular action. This is recorded in both the destination state (as a possible incoming action) and the source state (as a possible outgoing action). When it gets a reward, the corresponding state is put in a list of goals. Under goal directed navigation, the decision process consists of setting a "value" quantity in the goal state(s) and then propagating backwards along the allowed incoming actions, decrementing the value a little bit each time. The propagation continues until all accessible states have been processed. From its current state, the rat then takes the action that leads to the state with the highest value. Under exploratory behavior it generates random values for neighboring states. It can also operate in a mixed exploratory and goal-directed mode where a random element is added to the propagated values, with the ration set by the "goalFactor". In the example model this is changed from 0 (full exploration) to 1 (goal only) after getting the reward five times. State.javapackage org.neurostruct.script.devel;
import java.util.HashSet;
import java.util.Iterator;
public class State {
HashSet outActions = new HashSet();
HashSet inActions = new HashSet();
double value;
boolean bdone = false;
public State() {
outActions.add(new Action(this, 0.));
outActions.add(new Action(this, 90.));
outActions.add(new Action(this, 180));
outActions.add(new Action(this, 270.));
}
void remove(Action action) {
outActions.remove(action);
}
void updateSource(Action a) {
if (inActions.contains(a)) {
// nothing to do;
} else {
inActions.add(a);
a.setTo(this);
if (inActions.size() > 4) {
// E.info("too many ways to get here? " + this);
}
}
}
void clearValue() {
value = 0;
bdone = false;
}
public void setValue(double d) {
value = d;
bdone = true;
}
public boolean doneValue() {
return bdone;
}
public double getValue() {
return value;
}
public double[] getValueRange() {
double vmin = 0.;
double vmax = 1.;
for (Iterator it = outActions.iterator(); it.hasNext(); ) {
Action a = (Action)it.next();
double v = a.getValue();
if (v > vmax) {
vmax = v;
}
if (v < vmin) {
vmin = v;
}
}
double[] ret = {vmin, vmax};
return ret;
}
public Iterator getInActionsIterator() {
return inActions.iterator();
}
public Iterator getActionsIterator() {
return outActions.iterator();
}
}
Action.javapackage org.neurostruct.script.devel;
public class Action {
double bearing;
State from;
State to;
public Action(State sas, double b) {
from = sas;
bearing = b;
}
public void setTo(State sas) {
if (to == null) {
to = sas;
} else {
// E.info("error - overwriting to? " + bearing);
}
}
State getFrom() {
return from;
}
State getTo() {
return to;
}
double getBearing() {
return bearing;
}
double getValue() {
double ret = 0.;
if (to != null) {
ret = (to.getValue() - from.getValue());
}
return ret;
}
public boolean sameDirectionAs(Action lastAction) {
boolean ret = false;
if (lastAction != null && Math.abs(bearing - lastAction.bearing) < 5.) {
ret = true;
}
return ret;
}
}
Navigator.javapackage org.neurostruct.script.devel;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import org.catacomb.be.Position;
import org.catacomb.report.E;
public class Navigator {
HashMap sasHM = new HashMap();
double stepLength;
HashSet goals = new HashSet();
Action lastAction;
State[] stateQueue = new State[10000];
int errno = 0;
Position lpos;
static final double EPS = 1.e-6; // small number, not important exactly how small;
public double goalFactor; // 0 for random exploration, 1 for totally goal directed
public Navigator() {
stepLength = 1.;
goalFactor = 0.5;
}
public void setStepLength(double d) {
stepLength = d;
}
public void setGoalFactor(double d) {
goalFactor = d;
}
public double getNextStepBearing(Position pos) {
// get an object containing the possible actions for this position;
State sas = getStateActionSet(pos);
if (goals.contains(sas)) {
E.info("fishy - already at goal? " + sas);
}
lpos = pos.copy();
// if there is a known goal, propagate back through the actions
// incrementing the values of the actions that lead to it, and
// then the ones that lead to them and so on
Collection vals = sasHM.values();
for (Iterator it = sasHM.values().iterator(); it.hasNext(); ) {
((State)it.next()).clearValue();
}
int nsas = 0;
for (Iterator it = goals.iterator(); it.hasNext(); ) {
State g = (State)(it.next());
g.setValue(1.);
stateQueue[nsas++] = g;
}
queuePropagate(stateQueue, nsas);
double[] valueRange = sas.getValueRange();
double dvr = valueRange[1] - valueRange[0];
if (dvr < EPS) {
dvr = EPS;
}
double vmin = valueRange[0];
double vmax = -1.;
Action atake = null;
for (Iterator it = sas.getActionsIterator(); it.hasNext(); ) {
Action a = (Action)it.next();
double v = a.getValue();
double ra = goalFactor * (v - vmin) / dvr + (1. - goalFactor) * Math.random();
if (ra > vmax) {
vmax = ra;
atake = a;
}
}
// update the states weight for the action that got us here
if (lastAction != null) {
sas.updateSource(lastAction);
}
lastAction = atake;
return atake.getBearing();
}
public void disableAttemptedAction() {
lastAction.getFrom().remove(lastAction);
}
public void reachedReward(Position pos) {
State sas = getStateActionSet(pos);
sas.updateSource(lastAction);
E.info("reached reward " + pos.getX() + " " + pos.getY() + " " + sas);
E.info("coming from " + lpos.getX() + " " + lpos.getY() + " " + lastAction.getBearing());
if (goals.contains(sas)) {
// already been there once - nothing more to do;
} else {
if (goals.size() > 0) {
E.info("duplicate goals??");
} else {
goals.add(sas);
//E.info("goal at " + pos.getX() + " " + pos.getY() + " " + goals.size());
}
}
}
private State getStateActionSet(Position pos) {
// this allocates place fields based on the position and
// step length. If the position has already been visited,
// the existing actions are returned, otherwise a new set
// is made.
long ix = Math.round(pos.getX() / stepLength);
long iy = Math.round(pos.getY() / stepLength);
String key = "" + ix + ":" + iy;
State ret = null;
if (sasHM.containsKey(key)) {
ret = (State)sasHM.get(key);
} else {
ret = new State();
sasHM.put(key, ret);
}
return ret;
}
private void queuePropagate(State[] sq, int nInQueue) {
int isq = 0;
int nsq = nInQueue;
while (isq < nsq) {
State csas = stateQueue[isq];
double d = csas.getValue() - 0.01;
for (Iterator it = csas.getInActionsIterator(); it.hasNext(); ) {
Action a = (Action)it.next();
State src = a.getFrom();
if (src.doneValue()) {
// already done;
} else {
src.setValue(d);
stateQueue[nsq++] = src;
}
}
isq += 1;
}
}
}
|
|||||||||




