Created
November 12, 2020 12:56
-
-
Save pbloem/c5a815759bebb4b0b3ada63fdb47f98d to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package org.submassive; | |
import java.io.BufferedInputStream; | |
import java.io.BufferedReader; | |
import java.io.File; | |
import java.io.FileInputStream; | |
import java.io.FileReader; | |
import java.io.IOException; | |
import java.io.InputStream; | |
import java.io.InputStreamReader; | |
import it.unimi.dsi.webgraph.labelling.ArcLabelledImmutableGraph; | |
import it.unimi.dsi.webgraph.labelling.ArcLabelledNodeIterator.LabelledArcIterator; | |
import it.unimi.dsi.webgraph.labelling.GammaCodedIntLabel; | |
import it.unimi.dsi.webgraph.labelling.Label; | |
import java.util.ArrayList; | |
import java.util.Deque; | |
import java.util.Iterator; | |
import java.util.LinkedList; | |
import java.util.List; | |
import java.nio.file.Files; | |
import java.util.stream.*; | |
class ShuaiLoader extends ArcLabelledImmutableGraph { | |
private File triples; | |
final private GammaCodedIntLabel prototype; | |
final private List<Integer> outdegrees = new ArrayList<Integer>(); | |
private class Triple | |
{ | |
private final int a, b, c; | |
public Triple(int a, int b, int c) | |
{ | |
this.a = a; this.b = b; this.c = c; | |
} | |
public int a() {return a;} | |
public int b() {return b;} | |
public int c() {return c;} | |
} | |
/** | |
* Iterate over all triples in the source file. | |
* | |
* @author peter | |
* | |
*/ | |
private class TripleIterator implements Iterator<Triple> | |
{ | |
private final int BUFFER_SIZE = 5000; | |
private BufferedReader input; | |
private Deque<Triple> buffer = new LinkedList<Triple>(); | |
public TripleIterator() | |
{ | |
try { | |
this.input = new BufferedReader(new FileReader(triples)); | |
} catch(IOException e) | |
{ | |
throw new RuntimeException(e); | |
} | |
} | |
public boolean hasNext() | |
{ | |
check(); | |
return ! buffer.isEmpty(); | |
} | |
public Triple next() | |
{ | |
check(); | |
return buffer.pop(); | |
} | |
private void check() | |
{ | |
try { | |
while(buffer.size() < BUFFER_SIZE) | |
{ | |
String line = input.readLine(); | |
String[] elem = line.trim().split("\\s+", 3); | |
assert elem.length == 3; | |
int a = Integer.parseInt(elem[0]); | |
int b = Integer.parseInt(elem[1]); | |
int c = Integer.parseInt(elem[2]); | |
buffer.add(new Triple(a, b, c)); | |
} | |
} catch(IOException e) | |
{ | |
throw new RuntimeException(e); | |
} | |
} | |
} | |
public ShuaiLoader(File triples) | |
{ | |
this.triples = triples; | |
this.prototype = new GammaCodedIntLabel("FOO"); | |
// - Not entirely sure what this does, but it works for the IntegerTriples example. | |
// - Loop to count the outdegrees | |
Iterator<Triple> it = new TripleIterator(); | |
while(it.hasNext()) | |
{ | |
Triple t = it.next(); | |
int a = t.a(); int b = t.b(); int c = t.c(); | |
ensure(outdegrees, a); | |
outdegrees.set(a, outdegrees.get(a) + 1); | |
} | |
} | |
/** | |
* Ensure that the given list has the given capacity (fill with zeros) | |
* @param list | |
* @param max | |
*/ | |
private void ensure(List<Integer> list, int max) | |
{ | |
while(list.size() < max + 1) | |
list.add(0); | |
} | |
@Override | |
public ArcLabelledImmutableGraph copy() | |
{ | |
return new ShuaiLoader(triples); | |
} | |
@Override | |
public LabelledArcIterator successors(int x) | |
{ | |
/** | |
* This function is the only sticking point. It seems we have to be able to generate all successors (outgoing neighbors) | |
* of a given node on the fly. | |
* | |
* If we make sure that the triples in the file are sorted by source node, I think we can compute the starting point (in nr of | |
* bytes into the file) or each node and start the TripleTierator from there. | |
* | |
*/ | |
return null; | |
} | |
@Override | |
public Label prototype() | |
{ | |
return prototype; | |
} | |
@Override | |
public int numNodes() | |
{ | |
return outdegrees.size(); | |
} | |
@Override | |
public boolean randomAccess() | |
{ | |
return false; | |
// -- No random access allowed (only iteration) | |
} | |
@Override | |
public int outdegree(int i) | |
{ | |
return outdegrees.get(i); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment