Quote
Anyway, that's an idea, I'll post it soon.
Thanx ZekeDragon, it would be great if you'd post it when it's finished.
Anyway, I have found one (giant) solution now. The method is huge but it does miracles and I have still not been able to find any faults in it. I'm not great in programming with low complexity (I'm actually also quite lousy on figuring the complexity out, so I won't tell you if it's fast or not) but fact is, that I'm trying this solution on about 1200 pages of dictionary-pages and it works like a charm.
The idea is to take out the longest sorted substring of the list and then take it from there.
I think the comments should explain the whole process
/**
* Check if the lemmas are in alphabetic order, remove the words that have been recognized as lemmas but aren't
*/
private void checkLemmas(){
//{ABZDEFGPGKLOPVRSTAUH} -> {{ABZ}, {DEFG}, {P}, {GKLOPV}, {RST}. {AU}, {H}}
ArrayList<ArrayList<String>> subLists = new ArrayList<ArrayList<String>>();
ArrayList<String> temp = new ArrayList<String>();
for(int i = 0; i<lemmas.size(); i++){
String possible = lemmas.get(i).getLexeme().toLowerCase();
if(temp.isEmpty()){ //behöver jag inte ha sen när jag har basfallet!
temp.add(possible);
}
else if(possible.compareTo(temp.get(temp.size()-1))>0){
temp.add(possible);
}
else{
ArrayList<String> temp2 = new ArrayList<String>();
temp2.addAll(temp);
subLists.add(temp2);
temp.clear();
temp.add(possible);
}
}
ArrayList<String> temp2 = new ArrayList<String>();
temp2.addAll(temp);
subLists.add(temp2);
temp.clear();
if(subLists.size() == 1)
System.out.println("SOOORTED!!!");
//get the longest substring - {GKLOPV}
ArrayList<String> longest = new ArrayList<String>();
for(int i = 0; i<subLists.size(); i++){
if(longest.isEmpty()){
longest.addAll(subLists.get(i));
}
else if(subLists.get(i).size() > longest.size()){
longest.clear();
longest.addAll(subLists.get(i));
}
}
//here we compare our longest substring to the string before and after
//we remove the "intruders"
//and concat the before-string to the longest, and the after-string to the longest
//explanations are to find in the comments.
//ex. subLists: {{ABZ}, {DEFG}, {P}, {CKLOPV}, {RST}, {AU}, {H}}
while(subLists.size() != 1){
int i = subLists.indexOf(longest);
//if longest just contains one element, we don't have a pattern, no use trying, only occurred once in a file of 700 pages.
if(longest.size() == 1)
break;
//if there are sublists left to the longest sublist
if(i != 0){
ArrayList<String> before = subLists.get(i-1);
//{P}, {CKLOPV}
if(before.size() == 1){
//if P < K - remove G
if(before.get(0).compareTo(longest.get(1))<0){
longest.remove(0);
subLists.get(i).remove(0);
longest.addAll(0, before);
//I've been having troubled with my variables
subLists.get(i).addAll(0, before);
subLists.remove(before);
}
//if P > K - remove P -> {{ABZ}, {DEFG}, {CKLOPV}, {RST}. {AU}}
else {
subLists.remove(before);
}
}
//{{ABZ}, {DEFG}, {CKLOPV}, {RST}, {AU}, {H}}
else{
for(int depth = 1, depth1 = 0; depth <= longest.size()
&& depth1<longest.size();depth++){
//depth = size of longest or max 4 - max can be changed, but I still haven't encountered enough reasons why
if(depth == longest.size()){
depth1++;
depth = 1;
}
if(depth == 4){
depth1++;
depth = 1;
}
//depth1 sets in when depth isn't enough, if depth1 ha passed 2 (can also go deeper if one finds it necessary)
//then we just skips the sublist before
if(depth1 == 2){
subLists.remove(before);
break;
}
//{DEFG}, {CKLOPV}
//if G (in before) < K - remove C -> {DEFGKLOPV}
if(depth1<before.size() && before.get(before.size()-(depth1+1)).
compareTo(longest.get(depth))<0){
for(int k = 0; k<depth; k++){
longest.remove(0);
subLists.get(i).clear();
subLists.get(i).addAll(longest);
}
//if we would have had to go on depth1 >0
for(int l = 0; l<depth1; l++){
subLists.get(i-1).remove(before.size()-1);
}
longest.addAll(0, before);
subLists.get(i).addAll(0, before);
subLists.remove(before);
break;
}
//{ABZ}, {DEFGKLOPRST}
//if B < D - remove Z -> {AB} + {DEFGKLOPRST} -> {{ABDEFGKLOPRST}}
else if(depth<before.size() && before.get(before.size()-(depth+1)).
compareTo(longest.get(depth1))<0){
for(int k = 0; k<depth; k++){
subLists.get(i-1).remove(before.size()-1);
}
//if we would have had to go on depth1 >0
for(int l = 0; l<depth1; l++){
longest.remove(0);
subLists.get(i).clear();
subLists.get(i).addAll(longest);
}
longest.addAll(0, subLists.get(i-1));
subLists.get(i).addAll(0, subLists.get(i-1));
subLists.remove(before);
break;
}
}
}
}
else{
System.out.println("left side finished: " + subLists);
}
i = subLists.indexOf(longest);
//if there are sublists right to the longest one
if(i != subLists.size()-1){
ArrayList<String> after = subLists.get(i+1);
//{{ABDEFGKLOPRSTU}, {H}}
if(after.size() == 1){
//if H > T - remove U (false)
if(after.get(0).compareTo(longest.get(longest.size()-2))>0){
subLists.get(i).remove(longest.size()-1);
longest.remove(longest.size()-1);
longest.addAll(after);
subLists.get(i).addAll(after);
subLists.remove(after);
}
//if H < T - remove H -> {{ABDEFGKLOPRSTU}}
else {
subLists.remove(after);
}
}
else{
//{{ABZ}, {DEFGKLOPV}, {RST}, {AU}, {H}}
for(int depth = 1, depth1 = 0; depth <= longest.size() && depth1<longest.size();depth++){
//depth = size of longest or max 4 - max can be changed, but I still haven't encountered enough reasons why
if(depth == longest.size()){
depth1++;
depth = 1;
}
if(depth == 4){
depth1++;
depth = 1;
}
//depth1 sets in when depth isn't enough, if depth1 ha passed 2 (can also go deeper if one finds it necessary)
//then we just skips the sublist before
if(depth1 == 2){
subLists.remove(after);
break;
}
// {ABDEFGKLOPV}, {RST}
//if S > V - remove R (false)
// {ABDEFGKLOPRST}, {AU}
//if U > T - remove A -> {{ABDEFGKLOPRST} + {U}} -> {{ABDEFGKLOPRSTU}}
if(depth<after.size() && after.get(depth).
compareTo(longest.get(longest.size()-(depth1+1)))>0){
for(int k = 0; k<depth; k++){
subLists.get(i+1).remove(0);
}
for(int l = 0; l<depth1; l++){
longest.remove(longest.size()-1);
subLists.get(i).clear();
subLists.get(i).addAll(longest);
}
longest.addAll(after);
subLists.get(i).addAll(after);
subLists.remove(after);
break;
}
// {DEFGKLOPV}, {RST}
//if R > P - remove V -> {DEFGKLOP} + {RST} -> {DEFGKLOPRST}
else if(depth1<after.size() && after.get(depth1).
compareTo(longest.get(longest.size()-(depth+1)))>0){
for(int k = 0; k<depth; k++){
longest.remove(longest.size()-1);
subLists.get(i).clear();
subLists.get(i).addAll(longest);
}
for(int l = 0; l<depth1; l++){
subLists.get(i+1).remove(0);
}
longest.addAll(subLists.get(i+1));
subLists.get(i).addAll(subLists.get(i+1));
subLists.remove(subLists.get(i+1));
break;
}
}
}
}
else{
System.out.println("right side finished " + subLists);
}
}
if(subLists.size() == 1)
System.out.println("/while - finished SORTING!");
}
If anyone has an idea for making it even more effective and great, please tell :)